Ejemplos de get_or_generate_txt_vocab en Python

Lenguaje de programación: Python

Namespace/Package Name: tensor2tensor.data_generators.generator_utils

Método / Función: get_or_generate_txt_vocab

Ejemplos en hotexamples.com: 4

Python get_or_generate_txt_vocab - 4 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de tensor2tensor.data_generators.generator_utils.get_or_generate_txt_vocab extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

Archivo: generator_utils_test.py Proyecto: AranKomat/tensor2tensor

  def testGetOrGenerateTxtVocab(self):
    data_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
    test_file = os.path.join(self.get_temp_dir(), "test.txt")
    with tf.gfile.Open(test_file, "w") as outfile:
      outfile.write("a b c\n")
      outfile.write("d e f\n")
    # Create a vocab over the test file.
    vocab1 = generator_utils.get_or_generate_txt_vocab(
        data_dir, "test.voc", 20, test_file)
    self.assertTrue(tf.gfile.Exists(os.path.join(data_dir, "test.voc")))
    self.assertIsNotNone(vocab1)

    # Append a new line to the test file which would change the vocab if
    # the vocab were not being read from file.
    with tf.gfile.Open(test_file, "a") as outfile:
      outfile.write("g h i\n")
    vocab2 = generator_utils.get_or_generate_txt_vocab(
        data_dir, "test.voc", 20, test_file)
    self.assertTrue(tf.gfile.Exists(os.path.join(data_dir, "test.voc")))
    self.assertIsNotNone(vocab2)
    self.assertEqual(vocab1.dump(), vocab2.dump())

Ejemplo n.º 2

Mostrar archivo

    def testGetOrGenerateTxtVocab(self):
        data_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
        test_file = os.path.join(self.get_temp_dir(), "test.txt")
        with tf.gfile.Open(test_file, "w") as outfile:
            outfile.write("a b c\n")
            outfile.write("d e f\n")
        # Create a vocab over the test file.
        vocab1 = generator_utils.get_or_generate_txt_vocab(
            data_dir, "test.voc", 20, test_file)
        self.assertTrue(tf.gfile.Exists(os.path.join(data_dir, "test.voc")))
        self.assertIsNotNone(vocab1)

        # Append a new line to the test file which would change the vocab if
        # the vocab were not being read from file.
        with tf.gfile.Open(test_file, "a") as outfile:
            outfile.write("g h i\n")
        vocab2 = generator_utils.get_or_generate_txt_vocab(
            data_dir, "test.voc", 20, test_file)
        self.assertTrue(tf.gfile.Exists(os.path.join(data_dir, "test.voc")))
        self.assertIsNotNone(vocab2)
        self.assertEqual(vocab1.dump(), vocab2.dump())

Ejemplo n.º 3

Mostrar archivo

 def generator(self, data_dir, tmp_dir, train):
     symbolizer_vocab = generator_utils.get_or_generate_txt_vocab(
         data_dir,
         self.vocab_file,
         self.targeted_vocab_size,
         filepatterns=[ENDE_TRAIN_TOK_SRC, ENDE_TRAIN_TOK_TRG])
     if train:
         data_src = ENDE_TRAIN_TOK_SR
         data_trg = ENDE_TRAIN_TOK_TRG
     else:
         data_src = ENDE_DEV_TOK_SRC
         data_trg = ENDE_DEV_TOK_TRG
     return token_generator(data_src, data_trg, symbolizer_vocab, EOS)

Ejemplo n.º 4

Mostrar archivo

Archivo: translate_enkrch.py Proyecto: 82magnolia/enkrch_zero_shot

    def generator(self, data_dir, tmp_dir, train):
        datasets = _ENKR_SUBTITLE_TRAIN_DATASETS if train else _ENKR_SUBTITLE_TEST_DATASETS
        source_datasets = [
            os.path.join(data_dir, path[1][0]) for path in datasets
        ]
        target_datasets = [
            os.path.join(data_dir, path[1][1]) for path in datasets
        ]

        source_vocab = generator_utils.get_or_generate_txt_vocab(
            data_dir, self.source_vocab_name, self.targeted_vocab_size,
            source_datasets)
        target_vocab = generator_utils.get_or_generate_txt_vocab(
            data_dir, self.target_vocab_name, self.targeted_vocab_size,
            target_datasets)

        tag = "train" if train else "dev"
        data_path = compile_data_from_txt(tmp_dir, datasets,
                                          "zero_shot_enkrch_tok_%s" % tag)

        return translate.bi_vocabs_token_generator(data_path + ".lang1",
                                                   data_path + ".lang2",
                                                   source_vocab, target_vocab,
                                                   EOS)