def test_list_directory(self): """Test list directory. """ # Setup and check preconditions. gfile.MkDir(self.prefix() + ":///test_list_directory") gfile.MkDir(self.prefix() + ":///test_list_directory/2") gfile.MkDir(self.prefix() + ":///test_list_directory/4") dir_name = self.prefix() + ":///test_list_directory" file_names = [ self.prefix() + ":///test_list_directory/1", self.prefix() + ":///test_list_directory/2/3" ] ch_dir_names = [ self.prefix() + ":///test_list_directory/4", ] for file_name in file_names: with gfile.Open(file_name, mode="w") as w: w.write("") for ch_dir_name in ch_dir_names: gfile.MkDir(ch_dir_name) ls_expected_result = file_names + ch_dir_names # Get list of files in directory. ls_result = gfile.ListDirectory(dir_name) # Check that list of files is correct. self.assertEqual(len(ls_expected_result), len(ls_result)) for e in ["1", "2", "4"]: self.assertTrue(e in ls_result, msg="Result doesn't contain '%s'" % e)
def test_is_directory(self): """Test is directory. """ # Setup and check preconditions. gfile.MkDir(self.prefix() + ":///test_is_directory") dir_name = self.prefix() + ":///test_is_directory/1" file_name = self.prefix() + ":///test_is_directory/2" with gfile.Open(file_name, mode="w") as w: w.write("") gfile.MkDir(dir_name) # Check that directory is a directory. self.assertTrue(gfile.IsDirectory(dir_name)) # Check that file is not a directory. self.assertFalse(gfile.IsDirectory(file_name))
def test_copy(self): """Test copy. """ # Setup and check preconditions. gfile.MkDir(self.prefix() + ":///test_copy") src_file_name = self.prefix() + ":///test_copy/1" dst_file_name = self.prefix() + ":///test_copy/2" self.assertFalse(gfile.Exists(src_file_name)) self.assertFalse(gfile.Exists(dst_file_name)) with gfile.Open(src_file_name, mode="w") as w: w.write("42") self.assertTrue(gfile.Exists(src_file_name)) self.assertFalse(gfile.Exists(dst_file_name)) # Copy file. gfile.Copy(src_file_name, dst_file_name) # Check that files are identical. self.assertTrue(gfile.Exists(src_file_name)) self.assertTrue(gfile.Exists(dst_file_name)) with gfile.Open(dst_file_name, mode="r") as r: data_v = r.read() self.assertEqual("42", data_v) # Remove file. gfile.Remove(src_file_name) gfile.Remove(dst_file_name) # Check that file was removed. self.assertFalse(gfile.Exists(src_file_name)) self.assertFalse(gfile.Exists(dst_file_name))
def _prepare(self): self._output_dir = os.path.join( self._options.output_dir, common.partition_repr(self._partition_id)) if gfile.Exists(self._output_dir): gfile.DeleteRecursively(self._output_dir) gfile.MkDir(self._options.output_dir) gfile.MkDir(self._output_dir) for fpath_id, fpath in enumerate(self._fpaths): fpath = "{}/{}".format(self._options.input_dir, fpath) reader = Merge.InputFileReader(fpath_id, fpath, self._options) self._readers.append(reader) self._active_fpath.add(fpath_id) logging.info("Merge partition_id:%d, path:%s", self._partition_id, fpath) self._preload_queue()
def main(FLAGS): output_dir = FLAGS.output_dir if FLAGS.verbose: print('output_dir', output_dir) print('data_file', FLAGS.data_file) print('kb_file', FLAGS.kb_file) print('output_prefix', FLAGS.output_prefix) if not tf.io.gfile.isdir(output_dir): gfile.MkDir(output_dir) input_data_file = FLAGS.data_file input_kb_file = FLAGS.kb_file if len(FLAGS.output_prefix.strip()) == 0: FLAGS.output_prefix = '' else: FLAGS.output_prefix = FLAGS.output_prefix output_data_pattern = output_dir + '/{0}data.json' output_kb_pattern = output_dir + '/{0}kb.json' # load data and do standardization raw_data, raw_kb = load_and_drop( input_data_file, input_kb_file, drop_incorrect=not FLAGS.keep_incorrect, verbose=FLAGS.verbose) write_infer_json( raw_data, raw_kb, output_data_pattern.format(FLAGS.output_prefix + '_infer_src_'), output_data_pattern.format(FLAGS.output_prefix + '_infer_tgt_'), output_kb_pattern.format(FLAGS.output_prefix+ '_infer_'))
def test_rename_dir(self): """Test rename dir. """ # Setup and check preconditions. gfile.MkDir(self.prefix() + ":///test_rename_dir") src_dir_name = self.prefix() + ":///test_rename_dir/1" dst_dir_name = self.prefix() + ":///test_rename_dir/2" gfile.MkDir(src_dir_name) # Rename directory. gfile.Rename(src_dir_name, dst_dir_name) # Check that only new name of directory is available. self.assertFalse(gfile.Exists(src_dir_name)) self.assertTrue(gfile.Exists(dst_dir_name)) self.assertTrue(gfile.IsDirectory(dst_dir_name)) # Remove directory. gfile.Remove(dst_dir_name) # Check that directory was removed. self.assertFalse(gfile.Exists(dst_dir_name))
def test_make_dirs(self): """Test make dirs. """ # Setup and check preconditions. dir_name = "igfs:///test_make_dirs/" self.assertFalse(gfile.Exists(dir_name)) # Make directory. gfile.MkDir(dir_name) # Check that directory was created. self.assertTrue(gfile.Exists(dir_name)) # Remove directory. gfile.Remove(dir_name) # Check that directory was removed. self.assertFalse(gfile.Exists(dir_name))
def test_remove(self): """Test remove. """ # Setup and check preconditions. gfile.MkDir(self.prefix() + ":///test_remove") file_name = self.prefix() + ":///test_remove/1" self.assertFalse(gfile.Exists(file_name)) with gfile.Open(file_name, mode="w") as w: w.write("") self.assertTrue(gfile.Exists(file_name)) # Remove file. gfile.Remove(file_name) # Check that file was removed. self.assertFalse(gfile.Exists(file_name))
def test_delete_recursively(self): """Test delete recursively. """ # Setup and check preconditions. dir_name = "igfs:///test_delete_recursively/" file_name = "igfs:///test_delete_recursively/1" self.assertFalse(gfile.Exists(dir_name)) self.assertFalse(gfile.Exists(file_name)) gfile.MkDir(dir_name) with gfile.Open(file_name, mode="w") as w: w.write("") self.assertTrue(gfile.Exists(dir_name)) self.assertTrue(gfile.Exists(file_name)) # Delete directory recursively. gfile.DeleteRecursively(dir_name) # Check that directory was deleted. self.assertFalse(gfile.Exists(dir_name)) self.assertFalse(gfile.Exists(file_name))
def test_rename_file(self): """Test rename file. """ # Setup and check preconditions. gfile.MkDir(self.prefix() + ":///test_rename_file") src_file_name = self.prefix() + ":///test_rename_file/1" dst_file_name = self.prefix() + ":///test_rename_file/2" with gfile.Open(src_file_name, mode="w") as w: w.write("42") self.assertTrue(gfile.Exists(src_file_name)) # Rename file. gfile.Rename(src_file_name, dst_file_name) # Check that only new name of file is available. self.assertFalse(gfile.Exists(src_file_name)) self.assertTrue(gfile.Exists(dst_file_name)) with gfile.Open(dst_file_name, mode="r") as r: data_v = r.read() self.assertEqual("42", data_v) # Remove file. gfile.Remove(dst_file_name) # Check that file was removed. self.assertFalse(gfile.Exists(dst_file_name))
def test_list_directory(self): """Test list directory. """ # Setup and check preconditions. dir_name = "igfs:///test_list_directory/" file_names = [ "igfs:///test_list_directory/1", "igfs:///test_list_directory/2/3" ] ch_dir_names = [ "igfs:///test_list_directory/4", ] for file_name in file_names: with gfile.Open(file_name, mode="w") as w: w.write("") for ch_dir_name in ch_dir_names: gfile.MkDir(ch_dir_name) ls_expected_result = file_names + ch_dir_names # Get list of files in directory. ls_result = gfile.ListDirectory(dir_name) # Check that list of files is correct. self.assertEqual(len(ls_expected_result), len(ls_result)) for e in ["1", "2", "4"]: self.assertTrue(e in ls_result)
def test_write_read_file(self): """Test write/read file. """ # Setup and check preconditions. gfile.MkDir(self.prefix() + ":///test_write_read_file") file_name = self.prefix() + ":///test_write_read_file/1" rows = 10 self.assertFalse(gfile.Exists(file_name)) # Write data. with gfile.Open(file_name, mode="w") as w: for i in range(rows): w.write("This is row\n") # Read data. with gfile.Open(file_name, mode="r") as r: lines = r.readlines() # Check that data is equal. self.assertEqual(rows, len(lines)) for i in range(rows): self.assertEqual("This is row\n", lines[i]) # Remove file. gfile.Remove(file_name) # Check that file was removed. self.assertFalse(gfile.Exists(file_name))
def make_path(path): if not gfile.Exists(path): gfile.MkDir(path)
def setUp(self): # pylint: disable=invalid-name os.environ["IGNITE_PORT"] = '10801' gfile.MkDir("ggfs:///")
def main(FLAGS): all_jobs = process_job_type(FLAGS.job_type, FLAGS.input_type) output_dir = FLAGS.output_dir if FLAGS.verbose: print('all_jobs', all_jobs) print('input_type', FLAGS.input_type) print('output_dir', output_dir) print('data_file', FLAGS.data_file) print('kb_file', FLAGS.kb_file) print('output_prefix', FLAGS.output_prefix) print('skip_standardize', FLAGS.skip_standardize) print('keep_incorrect', FLAGS.keep_incorrect) print('word_cutoff', FLAGS.word_cutoff) print('gen_voc', FLAGS.gen_voc) print('infer_src_data_file', FLAGS.infer_src_data_file) print('infer_kb_file', FLAGS.infer_kb_file) if not tf.io.gfile.isdir(output_dir): gfile.MkDir(output_dir) input_data_file = FLAGS.data_file input_kb_file = FLAGS.kb_file if len(FLAGS.output_prefix.strip()) == 0: FLAGS.output_prefix = '' else: FLAGS.output_prefix = FLAGS.output_prefix # output_vab = output_dir + '/{0}.vocab'.format(FLAGS.output_prefix) output_vab = output_dir + '/vocab.txt' output_all_vab = output_dir + '/{0}.full.vocab'.format(FLAGS.output_prefix) all_token_file = output_dir + '/{0}.special.vocab'.format( FLAGS.output_prefix) first_name_cats_file = output_dir + '/{0}.firstname.cat'.format( FLAGS.output_prefix) last_name_cats_file = output_dir + '/{0}.lastname.cat'.format( FLAGS.output_prefix) flight_cats_file = output_dir + '/{0}.flight.cat'.format( FLAGS.output_prefix) status_cats_file = output_dir + '/{0}.status.cat'.format( FLAGS.output_prefix) cat_files = [ first_name_cats_file, last_name_cats_file, flight_cats_file, status_cats_file ] output_data_pattern = output_dir + '/{0}data' output_kb_pattern = output_dir + '/{0}kb' nltk_path = FLAGS.nltk_data nltk.data.path.append(nltk_path) sent_tokenize = nltk.sent_tokenize infer_flag_exists = FLAGS.infer_src_data_file or FLAGS.infer_kb_file if any(j != 'infer' for j in all_jobs) or not infer_flag_exists: # We need to process the default json data = load_data_from_jsons(FLAGS, input_data_file, input_kb_file, output_vab, output_all_vab, FLAGS.gen_cat, cat_files) if 'infer' in all_jobs and infer_flag_exists: # We need to process alternate infer json alt_infer_data = load_data_from_jsons_stream( FLAGS, FLAGS.infer_src_data_file, FLAGS.infer_kb_file, None, None, False, [], FLAGS.self_play_start_turn) if 'train' in all_jobs: if FLAGS.verbose: print('writing train data') write_data(data, output_data_pattern.format(FLAGS.output_prefix + '.'), output_kb_pattern.format(FLAGS.output_prefix + '.')) if 'eval' in all_jobs: if FLAGS.verbose: print('writing eval data') write_data(data, output_data_pattern.format(FLAGS.output_prefix + '.eval.'), output_kb_pattern.format(FLAGS.output_prefix + '.eval.')) if 'infer' in all_jobs: if FLAGS.verbose: print('writing infer data') if infer_flag_exists: write_data(alt_infer_data, output_data_pattern.format(FLAGS.output_prefix + '.infer.src.'), output_kb_pattern.format(FLAGS.output_prefix + '.infer.'), alt_infer=True) else: write_completion( data, output_data_pattern.format(FLAGS.output_prefix + '.infer.src.'), output_data_pattern.format(FLAGS.output_prefix + '.infer.tar.'), output_kb_pattern.format(FLAGS.output_prefix + '.infer.')) if 'sp-train' in all_jobs: if FLAGS.verbose: print('writing self play training data') write_self_play( data, output_data_pattern.format(FLAGS.output_prefix + '.selfplay.'), output_kb_pattern.format(FLAGS.output_prefix + '.selfplay.')) if 'sp-eval' in all_jobs: if FLAGS.verbose: print('writing self play eval data') write_self_play( data, output_data_pattern.format(FLAGS.output_prefix + '.selfplay.eval.'), output_kb_pattern.format(FLAGS.output_prefix + '.selfplay.eval.')) if FLAGS.gen_special_token: # write all token file. f_tokens = gfile.Open(all_token_file, 'w') for token in list(list_of_action_tokens_except_name): f_tokens.write(token + '\n') f_tokens.close()