Beispiel #1
0
  def test_list_directory(self):
    """Test list directory.

    """
    # Setup and check preconditions.
    gfile.MkDir(self.prefix() + ":///test_list_directory")
    gfile.MkDir(self.prefix() + ":///test_list_directory/2")
    gfile.MkDir(self.prefix() + ":///test_list_directory/4")
    dir_name = self.prefix() + ":///test_list_directory"
    file_names = [
        self.prefix() + ":///test_list_directory/1",
        self.prefix() + ":///test_list_directory/2/3"
    ]
    ch_dir_names = [
        self.prefix() + ":///test_list_directory/4",
    ]
    for file_name in file_names:
      with gfile.Open(file_name, mode="w") as w:
        w.write("")
    for ch_dir_name in ch_dir_names:
      gfile.MkDir(ch_dir_name)
    ls_expected_result = file_names + ch_dir_names
    # Get list of files in directory.
    ls_result = gfile.ListDirectory(dir_name)
    # Check that list of files is correct.
    self.assertEqual(len(ls_expected_result), len(ls_result))
    for e in ["1", "2", "4"]:
      self.assertTrue(e in ls_result, msg="Result doesn't contain '%s'" % e)
Beispiel #2
0
  def test_is_directory(self):
    """Test is directory.

    """
    # Setup and check preconditions.
    gfile.MkDir(self.prefix() + ":///test_is_directory")
    dir_name = self.prefix() + ":///test_is_directory/1"
    file_name = self.prefix() + ":///test_is_directory/2"
    with gfile.Open(file_name, mode="w") as w:
      w.write("")
    gfile.MkDir(dir_name)
    # Check that directory is a directory.
    self.assertTrue(gfile.IsDirectory(dir_name))
    # Check that file is not a directory.
    self.assertFalse(gfile.IsDirectory(file_name))
Beispiel #3
0
  def test_copy(self):
    """Test copy.

    """
    # Setup and check preconditions.
    gfile.MkDir(self.prefix() + ":///test_copy")
    src_file_name = self.prefix() + ":///test_copy/1"
    dst_file_name = self.prefix() + ":///test_copy/2"
    self.assertFalse(gfile.Exists(src_file_name))
    self.assertFalse(gfile.Exists(dst_file_name))
    with gfile.Open(src_file_name, mode="w") as w:
      w.write("42")
    self.assertTrue(gfile.Exists(src_file_name))
    self.assertFalse(gfile.Exists(dst_file_name))
    # Copy file.
    gfile.Copy(src_file_name, dst_file_name)
    # Check that files are identical.
    self.assertTrue(gfile.Exists(src_file_name))
    self.assertTrue(gfile.Exists(dst_file_name))
    with gfile.Open(dst_file_name, mode="r") as r:
      data_v = r.read()
    self.assertEqual("42", data_v)
    # Remove file.
    gfile.Remove(src_file_name)
    gfile.Remove(dst_file_name)
    # Check that file was removed.
    self.assertFalse(gfile.Exists(src_file_name))
    self.assertFalse(gfile.Exists(dst_file_name))
Beispiel #4
0
 def _prepare(self):
     self._output_dir = os.path.join(
         self._options.output_dir,
         common.partition_repr(self._partition_id))
     if gfile.Exists(self._output_dir):
         gfile.DeleteRecursively(self._output_dir)
     gfile.MkDir(self._options.output_dir)
     gfile.MkDir(self._output_dir)
     for fpath_id, fpath in enumerate(self._fpaths):
         fpath = "{}/{}".format(self._options.input_dir, fpath)
         reader = Merge.InputFileReader(fpath_id, fpath, self._options)
         self._readers.append(reader)
         self._active_fpath.add(fpath_id)
         logging.info("Merge partition_id:%d, path:%s", self._partition_id,
                      fpath)
     self._preload_queue()
Beispiel #5
0
def main(FLAGS):
  output_dir = FLAGS.output_dir
  if FLAGS.verbose:
    print('output_dir', output_dir)
    print('data_file', FLAGS.data_file)
    print('kb_file', FLAGS.kb_file)
    print('output_prefix', FLAGS.output_prefix)

  if not tf.io.gfile.isdir(output_dir):
    gfile.MkDir(output_dir)

  input_data_file = FLAGS.data_file
  input_kb_file = FLAGS.kb_file
  if len(FLAGS.output_prefix.strip()) == 0:
    FLAGS.output_prefix = ''
  else:
    FLAGS.output_prefix = FLAGS.output_prefix

  output_data_pattern = output_dir + '/{0}data.json'
  output_kb_pattern = output_dir + '/{0}kb.json'

  # load data and do standardization
  raw_data, raw_kb = load_and_drop(
      input_data_file,
      input_kb_file,
      drop_incorrect=not FLAGS.keep_incorrect,
      verbose=FLAGS.verbose)

  write_infer_json(
      raw_data, raw_kb,
      output_data_pattern.format(FLAGS.output_prefix + '_infer_src_'),
      output_data_pattern.format(FLAGS.output_prefix + '_infer_tgt_'),
      output_kb_pattern.format(FLAGS.output_prefix+ '_infer_'))
Beispiel #6
0
  def test_rename_dir(self):
    """Test rename dir.

    """
    # Setup and check preconditions.
    gfile.MkDir(self.prefix() + ":///test_rename_dir")
    src_dir_name = self.prefix() + ":///test_rename_dir/1"
    dst_dir_name = self.prefix() + ":///test_rename_dir/2"
    gfile.MkDir(src_dir_name)
    # Rename directory.
    gfile.Rename(src_dir_name, dst_dir_name)
    # Check that only new name of directory is available.
    self.assertFalse(gfile.Exists(src_dir_name))
    self.assertTrue(gfile.Exists(dst_dir_name))
    self.assertTrue(gfile.IsDirectory(dst_dir_name))
    # Remove directory.
    gfile.Remove(dst_dir_name)
    # Check that directory was removed.
    self.assertFalse(gfile.Exists(dst_dir_name))
Beispiel #7
0
    def test_make_dirs(self):
        """Test make dirs.

    """
        # Setup and check preconditions.
        dir_name = "igfs:///test_make_dirs/"
        self.assertFalse(gfile.Exists(dir_name))
        # Make directory.
        gfile.MkDir(dir_name)
        # Check that directory was created.
        self.assertTrue(gfile.Exists(dir_name))
        # Remove directory.
        gfile.Remove(dir_name)
        # Check that directory was removed.
        self.assertFalse(gfile.Exists(dir_name))
Beispiel #8
0
  def test_remove(self):
    """Test remove.

    """
    # Setup and check preconditions.
    gfile.MkDir(self.prefix() + ":///test_remove")
    file_name = self.prefix() + ":///test_remove/1"
    self.assertFalse(gfile.Exists(file_name))
    with gfile.Open(file_name, mode="w") as w:
      w.write("")
    self.assertTrue(gfile.Exists(file_name))
    # Remove file.
    gfile.Remove(file_name)
    # Check that file was removed.
    self.assertFalse(gfile.Exists(file_name))
Beispiel #9
0
    def test_delete_recursively(self):
        """Test delete recursively.

    """
        # Setup and check preconditions.
        dir_name = "igfs:///test_delete_recursively/"
        file_name = "igfs:///test_delete_recursively/1"
        self.assertFalse(gfile.Exists(dir_name))
        self.assertFalse(gfile.Exists(file_name))
        gfile.MkDir(dir_name)
        with gfile.Open(file_name, mode="w") as w:
            w.write("")
        self.assertTrue(gfile.Exists(dir_name))
        self.assertTrue(gfile.Exists(file_name))
        # Delete directory recursively.
        gfile.DeleteRecursively(dir_name)
        # Check that directory was deleted.
        self.assertFalse(gfile.Exists(dir_name))
        self.assertFalse(gfile.Exists(file_name))
Beispiel #10
0
  def test_rename_file(self):
    """Test rename file.

    """
    # Setup and check preconditions.
    gfile.MkDir(self.prefix() + ":///test_rename_file")
    src_file_name = self.prefix() + ":///test_rename_file/1"
    dst_file_name = self.prefix() + ":///test_rename_file/2"
    with gfile.Open(src_file_name, mode="w") as w:
      w.write("42")
    self.assertTrue(gfile.Exists(src_file_name))
    # Rename file.
    gfile.Rename(src_file_name, dst_file_name)
    # Check that only new name of file is available.
    self.assertFalse(gfile.Exists(src_file_name))
    self.assertTrue(gfile.Exists(dst_file_name))
    with gfile.Open(dst_file_name, mode="r") as r:
      data_v = r.read()
    self.assertEqual("42", data_v)
    # Remove file.
    gfile.Remove(dst_file_name)
    # Check that file was removed.
    self.assertFalse(gfile.Exists(dst_file_name))
Beispiel #11
0
    def test_list_directory(self):
        """Test list directory.

    """
        # Setup and check preconditions.
        dir_name = "igfs:///test_list_directory/"
        file_names = [
            "igfs:///test_list_directory/1", "igfs:///test_list_directory/2/3"
        ]
        ch_dir_names = [
            "igfs:///test_list_directory/4",
        ]
        for file_name in file_names:
            with gfile.Open(file_name, mode="w") as w:
                w.write("")
        for ch_dir_name in ch_dir_names:
            gfile.MkDir(ch_dir_name)
        ls_expected_result = file_names + ch_dir_names
        # Get list of files in directory.
        ls_result = gfile.ListDirectory(dir_name)
        # Check that list of files is correct.
        self.assertEqual(len(ls_expected_result), len(ls_result))
        for e in ["1", "2", "4"]:
            self.assertTrue(e in ls_result)
Beispiel #12
0
  def test_write_read_file(self):
    """Test write/read file.

    """
    # Setup and check preconditions.
    gfile.MkDir(self.prefix() + ":///test_write_read_file")
    file_name = self.prefix() + ":///test_write_read_file/1"
    rows = 10
    self.assertFalse(gfile.Exists(file_name))
    # Write data.
    with gfile.Open(file_name, mode="w") as w:
      for i in range(rows):
        w.write("This is row\n")
    # Read data.
    with gfile.Open(file_name, mode="r") as r:
      lines = r.readlines()
    # Check that data is equal.
    self.assertEqual(rows, len(lines))
    for i in range(rows):
      self.assertEqual("This is row\n", lines[i])
    # Remove file.
    gfile.Remove(file_name)
    # Check that file was removed.
    self.assertFalse(gfile.Exists(file_name))
Beispiel #13
0
def make_path(path):
  if not gfile.Exists(path):
    gfile.MkDir(path)
Beispiel #14
0
 def setUp(self): # pylint: disable=invalid-name
   os.environ["IGNITE_PORT"] = '10801'
   gfile.MkDir("ggfs:///")
Beispiel #15
0
def main(FLAGS):
    all_jobs = process_job_type(FLAGS.job_type, FLAGS.input_type)
    output_dir = FLAGS.output_dir
    if FLAGS.verbose:
        print('all_jobs', all_jobs)
        print('input_type', FLAGS.input_type)
        print('output_dir', output_dir)
        print('data_file', FLAGS.data_file)
        print('kb_file', FLAGS.kb_file)
        print('output_prefix', FLAGS.output_prefix)
        print('skip_standardize', FLAGS.skip_standardize)
        print('keep_incorrect', FLAGS.keep_incorrect)
        print('word_cutoff', FLAGS.word_cutoff)
        print('gen_voc', FLAGS.gen_voc)
        print('infer_src_data_file', FLAGS.infer_src_data_file)
        print('infer_kb_file', FLAGS.infer_kb_file)

    if not tf.io.gfile.isdir(output_dir):
        gfile.MkDir(output_dir)

    input_data_file = FLAGS.data_file
    input_kb_file = FLAGS.kb_file
    if len(FLAGS.output_prefix.strip()) == 0:
        FLAGS.output_prefix = ''
    else:
        FLAGS.output_prefix = FLAGS.output_prefix
    # output_vab = output_dir + '/{0}.vocab'.format(FLAGS.output_prefix)
    output_vab = output_dir + '/vocab.txt'
    output_all_vab = output_dir + '/{0}.full.vocab'.format(FLAGS.output_prefix)
    all_token_file = output_dir + '/{0}.special.vocab'.format(
        FLAGS.output_prefix)
    first_name_cats_file = output_dir + '/{0}.firstname.cat'.format(
        FLAGS.output_prefix)
    last_name_cats_file = output_dir + '/{0}.lastname.cat'.format(
        FLAGS.output_prefix)
    flight_cats_file = output_dir + '/{0}.flight.cat'.format(
        FLAGS.output_prefix)
    status_cats_file = output_dir + '/{0}.status.cat'.format(
        FLAGS.output_prefix)
    cat_files = [
        first_name_cats_file, last_name_cats_file, flight_cats_file,
        status_cats_file
    ]

    output_data_pattern = output_dir + '/{0}data'
    output_kb_pattern = output_dir + '/{0}kb'

    nltk_path = FLAGS.nltk_data
    nltk.data.path.append(nltk_path)
    sent_tokenize = nltk.sent_tokenize

    infer_flag_exists = FLAGS.infer_src_data_file or FLAGS.infer_kb_file

    if any(j != 'infer' for j in all_jobs) or not infer_flag_exists:
        # We need to process the default json
        data = load_data_from_jsons(FLAGS, input_data_file, input_kb_file,
                                    output_vab, output_all_vab, FLAGS.gen_cat,
                                    cat_files)

    if 'infer' in all_jobs and infer_flag_exists:
        # We need to process alternate infer json
        alt_infer_data = load_data_from_jsons_stream(
            FLAGS, FLAGS.infer_src_data_file, FLAGS.infer_kb_file, None, None,
            False, [], FLAGS.self_play_start_turn)
    if 'train' in all_jobs:
        if FLAGS.verbose:
            print('writing train data')
        write_data(data, output_data_pattern.format(FLAGS.output_prefix + '.'),
                   output_kb_pattern.format(FLAGS.output_prefix + '.'))
    if 'eval' in all_jobs:
        if FLAGS.verbose:
            print('writing eval data')
        write_data(data,
                   output_data_pattern.format(FLAGS.output_prefix + '.eval.'),
                   output_kb_pattern.format(FLAGS.output_prefix + '.eval.'))
    if 'infer' in all_jobs:
        if FLAGS.verbose:
            print('writing infer data')
        if infer_flag_exists:
            write_data(alt_infer_data,
                       output_data_pattern.format(FLAGS.output_prefix +
                                                  '.infer.src.'),
                       output_kb_pattern.format(FLAGS.output_prefix +
                                                '.infer.'),
                       alt_infer=True)
        else:
            write_completion(
                data,
                output_data_pattern.format(FLAGS.output_prefix +
                                           '.infer.src.'),
                output_data_pattern.format(FLAGS.output_prefix +
                                           '.infer.tar.'),
                output_kb_pattern.format(FLAGS.output_prefix + '.infer.'))
    if 'sp-train' in all_jobs:
        if FLAGS.verbose:
            print('writing self play training data')
        write_self_play(
            data,
            output_data_pattern.format(FLAGS.output_prefix + '.selfplay.'),
            output_kb_pattern.format(FLAGS.output_prefix + '.selfplay.'))
    if 'sp-eval' in all_jobs:
        if FLAGS.verbose:
            print('writing self play eval data')
        write_self_play(
            data,
            output_data_pattern.format(FLAGS.output_prefix +
                                       '.selfplay.eval.'),
            output_kb_pattern.format(FLAGS.output_prefix + '.selfplay.eval.'))

    if FLAGS.gen_special_token:
        # write all token file.
        f_tokens = gfile.Open(all_token_file, 'w')
        for token in list(list_of_action_tokens_except_name):
            f_tokens.write(token + '\n')
        f_tokens.close()