コード例 #1
0
def tabbed_parsing_character_generator(tmp_dir, train):
    """Generate source and target data from a single file."""
    character_vocab = text_encoder.ByteTextEncoder()
    filename = "parsing_{0}.pairs".format("train" if train else "dev")
    pair_filepath = os.path.join(tmp_dir, filename)
    return tabbed_generator(pair_filepath, character_vocab, character_vocab,
                            EOS)
コード例 #2
0
ファイル: ice_parsing.py プロジェクト: ye-zi/tensor2tensor
def tabbed_parsing_token_generator(data_dir, tmp_dir, train, prefix,
                                   source_vocab_size, target_vocab_size):
  """Generate source and target data from a single file."""
  filename = "parsing_{0}.pairs".format("train" if train else "dev")
  source_vocab = generator_utils.get_or_generate_tabbed_vocab(
      data_dir, tmp_dir, filename, 0,
      prefix + "_source.tokens.vocab.%d" % source_vocab_size, source_vocab_size)
  target_vocab = generator_utils.get_or_generate_tabbed_vocab(
      data_dir, tmp_dir, filename, 1,
      prefix + "_target.tokens.vocab.%d" % target_vocab_size, target_vocab_size)
  pair_filepath = os.path.join(tmp_dir, filename)
  return tabbed_generator(pair_filepath, source_vocab, target_vocab, EOS)