Ejemplos de Subtokenizer en Python

Lenguaje de programación: Python

Namespace/Package Name: official.nlp.transformer.utils.tokenizer

Método / Función: Subtokenizer

Ejemplos en hotexamples.com: 5

Python Subtokenizer - 5 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de official.nlp.transformer.utils.tokenizer.Subtokenizer extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

def evaluate_and_log_bleu(model,
                          params,
                          bleu_source,
                          bleu_ref,
                          vocab_file,
                          distribution_strategy=None):
  """Calculate and record the BLEU score.

  Args:
    model: A Keras model, used to generate the translations.
    params: A dictionary, containing the translation related parameters.
    bleu_source: A file containing source sentences for translation.
    bleu_ref: A file containing the reference for the translated sentences.
    vocab_file: A file containing the vocabulary for translation.
    distribution_strategy: A platform distribution strategy, used for TPU based
      translation.

  Returns:
    uncased_score: A float, the case insensitive BLEU score.
    cased_score: A float, the case sensitive BLEU score.
  """
  subtokenizer = tokenizer.Subtokenizer(vocab_file)

  uncased_score, cased_score = translate_and_compute_bleu(
      model, params, subtokenizer, bleu_source, bleu_ref, distribution_strategy)

  logging.info("Bleu score (uncased): %s", uncased_score)
  logging.info("Bleu score (cased): %s", cased_score)
  return uncased_score, cased_score

Ejemplo n.º 2

Mostrar archivo

 def _init_subtokenizer(self, vocab_list):
     temp_file = tempfile.NamedTemporaryFile(delete=False)
     with tf.io.gfile.GFile(temp_file.name, "w") as w:
         for subtoken in vocab_list:
             w.write("'%s'" % subtoken)
             w.write("\n")
     return tokenizer.Subtokenizer(temp_file.name, reserved_tokens=[])

Ejemplo n.º 3

Mostrar archivo

def evaluate_and_log_bleu(estimator, bleu_source, bleu_ref, vocab_file):
    """Calculate and record the BLEU score."""
    subtokenizer = tokenizer.Subtokenizer(vocab_file)

    uncased_score, cased_score = translate_and_compute_bleu(
        estimator, subtokenizer, bleu_source, bleu_ref)

    tf.logging.info("Bleu score (uncased): %f", uncased_score)
    tf.logging.info("Bleu score (cased): %f", cased_score)
    return uncased_score, cased_score

Ejemplo n.º 4

Mostrar archivo

  def predict(self):
    """Predicts result from the model."""
    params = self.params
    flags_obj = self.flags_obj

    with tf.name_scope("model"):
      model = transformer.create_model(params, is_train=False)
      self._load_weights_if_possible(
          model, tf.train.latest_checkpoint(self.flags_obj.model_dir))
      model.summary()
    subtokenizer = tokenizer.Subtokenizer(flags_obj.vocab_file)

    ds = data_pipeline.eval_input_fn(params)
    ds = ds.map(lambda x, y: x).take(_SINGLE_SAMPLE)
    ret = model.predict(ds)
    val_outputs, _ = ret
    length = len(val_outputs)
    for i in range(length):
      translate.translate_from_input(val_outputs[i], subtokenizer)

Ejemplo n.º 5

Mostrar archivo

Archivo: translate.py Proyecto: yerman21/modelsTensorFlow

def main(unused_argv):
  from official.transformer import transformer_main

  tf.logging.set_verbosity(tf.logging.INFO)

  if FLAGS.text is None and FLAGS.file is None:
    tf.logging.warn("Nothing to translate. Make sure to call this script using "
                    "flags --text or --file.")
    return

  subtokenizer = tokenizer.Subtokenizer(FLAGS.vocab_file)

  # Set up estimator and params
  params = transformer_main.PARAMS_MAP[FLAGS.param_set]
  params["beam_size"] = _BEAM_SIZE
  params["alpha"] = _ALPHA
  params["extra_decode_length"] = _EXTRA_DECODE_LENGTH
  params["batch_size"] = _DECODE_BATCH_SIZE
  estimator = tf.estimator.Estimator(
      model_fn=transformer_main.model_fn, model_dir=FLAGS.model_dir,
      params=params)

  if FLAGS.text is not None:
    tf.logging.info("Translating text: %s" % FLAGS.text)
    translate_text(estimator, subtokenizer, FLAGS.text)

  if FLAGS.file is not None:
    input_file = os.path.abspath(FLAGS.file)
    tf.logging.info("Translating file: %s" % input_file)
    if not tf.gfile.Exists(FLAGS.file):
      raise ValueError("File does not exist: %s" % input_file)

    output_file = None
    if FLAGS.file_out is not None:
      output_file = os.path.abspath(FLAGS.file_out)
      tf.logging.info("File output specified: %s" % output_file)

    translate_file(estimator, subtokenizer, input_file, output_file)