Python _save_until_eosの例、tensor2tensor.utils.decoding._save_until_eos Pythonの例

コード例 #1

0

ファイルを表示

ファイル: decoding.py プロジェクト: StudyDeepLearningAI/seq2seq-chatbots

def decode_interactively(estimator,
                         hparams,
                         decode_hp,
                         message,
                         response,
                         checkpoint_path=None):
    """Interactive decoding."""
    def input_fn():
        gen_fn = make_input_fn_from_generator(
            _interactive_input_fn(hparams, decode_hp, message))
        example = gen_fn()
        example = decoding._interactive_input_tensor_to_features_dict(
            example, hparams)
        return example

    result_iter = estimator.predict(input_fn, checkpoint_path=checkpoint_path)
    for result in result_iter:
        is_image = False  # TODO(lukaszkaiser): find out from problem id / class.
        targets_vocab = hparams.problem_hparams.vocabulary["targets"]

        if decode_hp.return_beams:
            beams = np.split(result["outputs"], decode_hp.beam_size, axis=0)
            beam_string = targets_vocab.decode(
                decoding._save_until_eos(beams[0], is_image))
            response.put(beam_string, block=False)
        else:
            if decode_hp.identity_output:
                response.put(" ".join(map(str, result["outputs"].flatten())),
                             block=False)
            else:
                response.put(targets_vocab.decode(
                    decoding._save_until_eos(result["outputs"], is_image)),
                             block=False)

コード例 #2

0

ファイルを表示

ファイル: translate_zhzh_new_gen_key.py プロジェクト: makailove123/tensor2tensor

 def generate_encoded_samples(self, data_dir, tmp_dir, input_file):
     generator = self.generate_samples(data_dir, tmp_dir, input_file)
     encoder = text_encoder.SubwordTextEncoder(self.vocab_filename)
     for sample in generator:
         if self.has_inputs:
             if FLAGS.max_seq_len > 0:
                 sample["inputs"] = encoder.encode(
                     sample["inputs"])[:FLAGS.max_seq_len]
             else:
                 sample["inputs"] = encoder.encode(sample["inputs"])
             sample["inputs"].append(text_encoder.EOS_ID)
         if "targets" in sample:
             if FLAGS.max_seq_len > 0:
                 sample["targets"] = encoder.encode(
                     sample["targets"])[:FLAGS.max_seq_len]
             else:
                 sample["targets"] = encoder.encode(sample["targets"])
             sample["targets"].append(text_encoder.EOS_ID)
         if "inputs" in sample:
             sample["inputs_d"] = encoder.decode(
                 decoding._save_until_eos(
                     np.array(sample["inputs"], dtype=np.int32), False))
         if "outputs" in sample:
             sample["targets_d"] = encoder.decode(
                 decoding._save_until_eos(
                     np.array(sample["targets"], dtype=np.int32), False))
         yield sample

コード例 #3

0

ファイルを表示

  def __decode_from_file(self, filename):
    """Compute predictions on entries in filename and write them out."""

    if not self.decode_hp.batch_size:
      self.decode_hp.batch_size = 32
      tf.logging.info("decode_hp.batch_size not specified; default=%d" %
                      self.decode_hp.batch_size)

    p_hparams = self.hparams.problem_hparams
    inputs_vocab = p_hparams.vocabulary["inputs"]
    targets_vocab = p_hparams.vocabulary["targets"]
    problem_name = "grapheme_to_phoneme_problem"
    tf.logging.info("Performing decoding from a file.")
    inputs = _get_inputs(filename)
    num_decode_batches = (len(inputs) - 1) // self.decode_hp.batch_size + 1

    def input_fn():
      """Function for inputs generator."""
      input_gen = _decode_batch_input_fn(
          num_decode_batches, inputs, inputs_vocab,
          self.decode_hp.batch_size, self.decode_hp.max_input_size)
      gen_fn = decoding.make_input_fn_from_generator(input_gen)
      example = gen_fn()
      return decoding._decode_input_tensor_to_features_dict(example,
                                                            self.hparams)

    decodes = []
    result_iter = self.estimator.predict(input_fn)
    try:
      for result in result_iter:
        if self.decode_hp.return_beams:
          decoded_inputs = inputs_vocab.decode(
              decoding._save_until_eos(result["inputs"], False))
          beam_decodes = []
          output_beams = np.split(result["outputs"], self.decode_hp.beam_size,
                                  axis=0)
          for k, beam in enumerate(output_beams):
            decoded_outputs = targets_vocab.decode(
                decoding._save_until_eos(beam, False))
            beam_decodes.append(decoded_outputs)
          decodes.append(beam_decodes)
        else:
          decoded_inputs = inputs_vocab.decode(
              decoding._save_until_eos(result["inputs"], False))
          decoded_outputs = targets_vocab.decode(
              decoding._save_until_eos(result["outputs"], False))
          decodes.append(decoded_outputs)
    except:
      # raise StandardError("Invalid model in {}".format(self.params.model_dir))
      raise ValueError("Invalid model in {}".format(self.params.model_dir))

    return [inputs, decodes]

コード例 #4

0

ファイルを表示

ファイル: g2p.py プロジェクト: oussemaster/g2p-seq2seq

    def decode_word(self, word):
        """Decode word.

    Args:
      word: word for decoding.

    Returns:
      pronunciation: a decoded phonemes sequence for input word.
    """
        num_samples = 1
        decode_length = 100
        vocabulary = self.problem.source_vocab
        # This should be longer than the longest input.
        const_array_size = 50

        input_ids = vocabulary.encode(word)
        input_ids.append(text_encoder.EOS_ID)
        self.inputs = [num_samples, decode_length, len(input_ids)] + input_ids
        assert len(self.inputs) < const_array_size
        self.inputs += [0] * (const_array_size - len(self.inputs))

        if self.first_ex:
            return

        res_iter = self.estimator.predict(self.input_fn)
        result = res_iter.next()
        pronunciations = []
        if self.decode_hp.return_beams:
            beams = np.split(result["outputs"],
                             self.decode_hp.beam_size,
                             axis=0)
            for k, beam in enumerate(beams):
                tf.logging.info("BEAM %d:" % k)
                beam_string = self.problem.target_vocab.decode(
                    decoding._save_until_eos(beam, is_image=False))
                pronunciations.append(beam_string)
                tf.logging.info(beam_string)
        else:
            if self.decode_hp.identity_output:
                tf.logging.info(" ".join(map(str,
                                             result["outputs"].flatten())))
            else:
                res = result["outputs"].flatten()
                if text_encoder.EOS_ID in res:
                    index = list(res).index(text_encoder.EOS_ID)
                    res = res[0:index]
                pronunciations.append(self.problem.target_vocab.decode(res))
        return pronunciations

コード例 #5

0

ファイルを表示

ファイル: g2p.py プロジェクト: cmusphinx/g2p-seq2seq

  def decode_word(self, word):
    """Decode word.

    Args:
      word: word for decoding.

    Returns:
      pronunciation: a decoded phonemes sequence for input word.
    """
    num_samples = 1
    decode_length = 100
    vocabulary = self.problem.source_vocab
    # This should be longer than the longest input.
    const_array_size = 50

    input_ids = vocabulary.encode(word)
    input_ids.append(text_encoder.EOS_ID)
    self.inputs = [num_samples, decode_length, len(input_ids)] + input_ids
    assert len(self.inputs) < const_array_size
    self.inputs += [0] * (const_array_size - len(self.inputs))

    result = next(self.res_iter)
    pronunciations = []
    if self.decode_hp.return_beams:
      beams = np.split(result["outputs"], self.decode_hp.beam_size, axis=0)
      for k, beam in enumerate(beams):
        tf.logging.info("BEAM %d:" % k)
        beam_string = self.problem.target_vocab.decode(
            decoding._save_until_eos(beam, is_image=False))
        pronunciations.append(beam_string)
        tf.logging.info(beam_string)
    else:
      if self.decode_hp.identity_output:
        tf.logging.info(" ".join(map(str, result["outputs"].flatten())))
      else:
        res = result["outputs"].flatten()
        if text_encoder.EOS_ID in res:
          index = list(res).index(text_encoder.EOS_ID)
          res = res[0:index]
        pronunciations.append(self.problem.target_vocab.decode(res))
    return pronunciations

コード例 #6

0

ファイルを表示

ファイル: transformer_model.py プロジェクト: vplab-github/transformer-caption

  def process(self, query):
    """Returns the visualizations for query.

    Args:
      query: The query to process.

    Returns:
      A dictionary of results with processing and graph visualizations.
    """
    tf.logging.info("Processing new query [%s]" %query)

    # Create the new TFDBG hook directory.
    hook_dir = "/tmp/t2t_server_dump/request_%d" %int(time.time())
    os.makedirs(hook_dir)
    hooks = [tfdbg.DumpingDebugHook(hook_dir, watch_fn=topk_watch_fn)]

    # TODO(kstevens): This is extremely hacky and slow for responding to
    # queries.  Figure out a reasonable way to pre-load the model weights before
    # forking and run queries through the estimator quickly.
    def server_input_fn():
      """Generator that returns just the current query."""
      for _ in range(1):
        input_ids = self.source_vocab.encode(query)
        input_ids.append(text_encoder.EOS_ID)
        x = [1, 100, len(input_ids)] + input_ids
        x += [0] * (self.const_array_size - len(x))
        d = {
            "inputs": np.array(x).astype(np.int32),
            "problem_choice": np.array(0).astype(np.int32)
        }
        yield d

    def input_fn():
      """Generator that returns just the current query."""
      gen_fn = decoding.make_input_fn_from_generator(server_input_fn())
      example = gen_fn()
      # TODO(kstevens): Make this method public
      # pylint: disable=protected-access
      return decoding._interactive_input_tensor_to_features_dict(
          example, self.hparams)

    # Make the prediction for the current query.
    result_iter = self.estimator.predict(input_fn, hooks=hooks)
    result = None
    for result in result_iter:
      break

    # Extract the beam search information by reading the dumped TFDBG event
    # tensors.  We first read and record the per step beam sequences then record
    # the beam scores.  Afterwards we align the two sets of values to create the
    # full graph vertices and edges.
    decoding_graph = graph.Graph()
    run_dirs = sorted(glob.glob(os.path.join(hook_dir, "run_*")))
    for run_dir in run_dirs:
      # Record the different completed and active beam sequence ids.
      alive_sequences = deque()
      finished_sequences = deque()

      # Make the root vertex since it always needs to exist.
      decoding_graph.get_vertex(sequence_key([0]))

      # Create the initial vertices and edges for the active and finished
      # sequences.  We uniquely define each vertex using it's full sequence path
      # as a string to ensure there's no collisions when the same step has two
      # instances of an output id.
      dump_dir = tfdbg.DebugDumpDir(run_dir, validate=False)
      seq_datums = dump_dir.find(predicate=seq_filter)
      for seq_datum in seq_datums:
        sequences = np.array(seq_datum.get_tensor()).astype(int)[0]
        if "alive" in seq_datum.node_name:
          alive_sequences.append(sequences)
        if "finished" in seq_datum.node_name:
          finished_sequences.append(sequences)

        for sequence in sequences:
          pieces = self.targets_vocab.decode_list(sequence)
          index = sequence[-1]
          if index == 0:
            continue

          parent = decoding_graph.get_vertex(sequence_key(sequence[:-1]))
          current = decoding_graph.get_vertex(sequence_key(sequence))

          edge = decoding_graph.add_edge(parent, current)
          edge.data["label"] = pieces[-1]
          edge.data["label_id"] = index
          # Coerce the type to be a python bool.  Numpy bools can't be easily
          # converted to JSON.
          edge.data["completed"] = bool(index == 1)

      # Examine the score results and store the scores with the associated edges
      # in the graph.  We fetch the vertices (and relevant edges) by looking
      # into the saved beam sequences stored above.
      score_datums = dump_dir.find(predicate=scores_filter)
      for score_datum in score_datums:
        if "alive" in score_datum.node_name:
          sequences = alive_sequences.popleft()

        if "finished" in score_datum.node_name:
          sequences = finished_sequences.popleft()

        scores = np.array(score_datum.get_tensor()).astype(float)[0]
        for i, score in enumerate(scores):
          sequence = sequences[i]
          if sequence[-1] == 0:
            continue

          vertex = decoding_graph.get_vertex(sequence_key(sequence))
          edge = decoding_graph.edges[vertex.in_edges[0]]
          edge.data["score"] = score
          edge.data["log_probability"] = score
          edge.data["total_log_probability"] = score

    # Delete the hook dir to save disk space
    shutil.rmtree(hook_dir)

    # Create the graph visualization data structure.
    graph_vis = {
        "visualization_name": "graph",
        "title": "Graph",
        "name": "graph",
        "search_graph": decoding_graph.to_dict(),
    }

    # Create the processing visualization data structure.
    # TODO(kstevens): Make this method public
    # pylint: disable=protected-access
    output_ids = decoding._save_until_eos(result["outputs"].flatten(), False)
    output_pieces = self.targets_vocab.decode_list(output_ids)
    output_token = [{"text": piece} for piece in output_pieces]
    output = self.targets_vocab.decode(output_ids)

    source_steps = [{
        "step_name": "Initial",
        "segment": [{
            "text": query
        }],
    }]

    target_steps = [{
        "step_name": "Initial",
        "segment": output_token,
    }, {
        "step_name": "Final",
        "segment": [{
            "text": output
        }],
    }]

    processing_vis = {
        "visualization_name": "processing",
        "title": "Processing",
        "name": "processing",
        "query_processing": {
            "source_processing": source_steps,
            "target_processing": target_steps,
        },
    }

    return {
        "result": [processing_vis, graph_vis],
    }

コード例 #7

0

ファイルを表示

ファイル: decoding.py プロジェクト: StudyDeepLearningAI/seq2seq-chatbots

def decode_from_dataset(estimator,
                        problem_name,
                        hparams,
                        decode_hp,
                        decode_to_file=None,
                        dataset_split=None):
    """Perform decoding from dataset."""
    tf.logging.info("Performing local inference from dataset for %s.",
                    str(problem_name))

    shard = decode_hp.shard_id if decode_hp.shards > 1 else None

    output_dir = os.path.join(estimator.model_dir, "decode")
    tf.gfile.MakeDirs(output_dir)

    if decode_hp.batch_size:
        hparams.batch_size = decode_hp.batch_size
        hparams.use_fixed_batch_size = True

    dataset_kwargs = {
        "shard": shard,
        "dataset_split": dataset_split,
        "max_records": decode_hp.num_samples
    }

    problem = hparams.problem
    infer_input_fn = problem.make_estimator_input_fn(
        tf.estimator.ModeKeys.PREDICT, hparams, dataset_kwargs=dataset_kwargs)

    predictions = estimator.predict(infer_input_fn)

    decode_to_file = decode_to_file or decode_hp.decode_to_file
    if decode_to_file:
        if decode_hp.shards > 1:
            decode_filename = decode_to_file + ("%.2d" % decode_hp.shard_id)
        else:
            decode_filename = decode_to_file

        output_filepath = decoding._decode_filename(decode_filename,
                                                    problem_name, decode_hp)
        parts = output_filepath.split(".")
        parts[-1] = "targets"
        target_filepath = ".".join(parts)
        parts[-1] = "inputs"
        input_filepath = ".".join(parts)
        parts[-1] = "enc_state"
        encoder_state_file_path = ".".join(parts)

        input_file = tf.gfile.Open(input_filepath, "w")

    problem_hparams = hparams.problem_hparams
    has_input = "inputs" in problem_hparams.vocabulary
    inputs_vocab_key = "inputs" if has_input else "targets"
    inputs_vocab = problem_hparams.vocabulary[inputs_vocab_key]

    ##### Modified #####
    # Encoder outputs list created

    encoder_outputs = []
    decoded_inputs = []

    for num_predictions, prediction in enumerate(predictions):
        num_predictions += 1
        inputs = prediction["inputs"]
        encoder_output = prediction["encoder_outputs"]
        decoded_input = inputs_vocab.decode(
            decoding._save_until_eos(inputs, False))

        encoder_outputs.append(encoder_output)
        decoded_inputs.append(decoded_input)

        ##### Modified #####
        # Writing encoder_outputs list to file

        if decode_to_file:
            for i, (e_output, d_input) in \
                    enumerate(zip(encoder_outputs, decoded_inputs)):

                input_file.write("{}:\t{}".format(
                    i,
                    str(d_input) + decode_hp.delimiter))

            np.save(encoder_state_file_path, np.array(encoder_outputs))

        if (0 <= decode_hp.num_samples <= num_predictions):
            break

    if decode_to_file:
        input_file.close()

    decoding.decorun_postdecode_hooks(
        decoding.DecodeHookArgs(estimator=estimator,
                                problem=problem,
                                output_dir=output_dir,
                                hparams=hparams,
                                decode_hparams=decode_hp))

    tf.logging.info("Completed inference on %d samples." % num_predictions)  # pylint: disable=undefined-loop-variable

コード例 #8

0

ファイルを表示

ファイル: decoding.py プロジェクト: StudyDeepLearningAI/seq2seq-chatbots

def decode_from_file(estimator,
                     filename,
                     hparams,
                     decode_hp,
                     decode_to_file=None,
                     checkpoint_path=None):
    """Compute predictions on entries in filename and write them out."""
    if not decode_hp.batch_size:
        decode_hp.batch_size = 32
        tf.logging.info("decode_hp.batch_size not specified; default=%d" %
                        decode_hp.batch_size)

    p_hp = hparams.problem_hparams
    has_input = "inputs" in p_hp.vocabulary
    inputs_vocab_key = "inputs" if has_input else "targets"
    inputs_vocab = p_hp.vocabulary[inputs_vocab_key]
    problem_name = FLAGS.problem
    tf.logging.info("Performing decoding from a file.")
    sorted_inputs, sorted_keys = decoding._get_sorted_inputs(
        filename, decode_hp.shards, decode_hp.delimiter)
    num_decode_batches = (len(sorted_inputs) - 1) // decode_hp.batch_size + 1

    def input_fn():
        input_gen = decoding._decode_batch_input_fn(num_decode_batches,
                                                    sorted_inputs,
                                                    inputs_vocab,
                                                    decode_hp.batch_size,
                                                    decode_hp.max_input_size)
        gen_fn = decoding.make_input_fn_from_generator(input_gen)
        example = gen_fn()
        return decoding._decode_input_tensor_to_features_dict(example, hparams)

    ##### Modified #####
    # Encoder outputs list created

    decoded_inputs = []
    encoder_outputs = []
    result_iter = estimator.predict(input_fn, checkpoint_path=checkpoint_path)

    start_time = time.time()
    total_time_per_step = 0
    total_cnt = 0

    def timer(gen):
        while True:
            try:
                start_time = time.time()
                item = next(gen)
                elapsed_time = time.time() - start_time
                yield elapsed_time, item
            except StopIteration:
                break

    for elapsed_time, result in timer(result_iter):
        decoded_input = inputs_vocab.decode(
            decoding._save_until_eos(result["inputs"], False))
        decoded_inputs.append(decoded_input)
        encoder_outputs.append(np.array(result["encoder_outputs"]))

        total_time_per_step += elapsed_time
        total_cnt += result["outputs"].shape[-1]
    tf.logging.info("Elapsed Time: %5.5f" % (time.time() - start_time))
    tf.logging.info("Averaged Single Token Generation Time: %5.7f" %
                    (total_time_per_step / total_cnt))

    decoded_inputs.reverse()
    encoder_outputs.reverse()

    decode_filename = decode_to_file if decode_to_file else filename

    if decode_hp.shards > 1:
        decode_filename += "%.2d" % decode_hp.shard_id
    if not decode_to_file:
        decode_filename = decoding._decode_filename(decode_filename,
                                                    problem_name, decode_hp)

    base = os.path.basename(decode_filename).split('.')
    dirname = os.path.dirname(decode_filename)
    encode_filename = os.path.join(dirname, '{}{}'.format(base[0], '.npy'))

    tf.logging.info("Writing inputs into %s" % decode_filename)
    tf.logging.info("Writing encoder outputs into %s" % encode_filename)
    print("Writing encoder outputs into %s" % encode_filename)
    outfile = tf.gfile.Open(decode_filename, "w")

    ##### Modified #####
    # Writing encoder_outputs list to file

    if decode_to_file:
        for i, (e_output, d_input) in \
                enumerate(zip(encoder_outputs, decoded_inputs)):
            outfile.write("{}".format(' '.join([
                word for word in str(d_input).strip().split()
                if word.strip() != '' and word.strip() != '<unk>'
            ]) + decode_hp.delimiter))

        np.save(encode_filename, np.array(encoder_outputs))

    if decode_to_file:
        outfile.close()

コード例 #9

0

ファイルを表示

ファイル: g2p.py プロジェクト: cmusphinx/g2p-seq2seq

  def __decode_from_file(self, filename, outfile=None):
    """Compute predictions on entries in filename and write them out."""

    if not self.decode_hp.batch_size:
      self.decode_hp.batch_size = 32
      tf.logging.info("decode_hp.batch_size not specified; default=%d" %
                      self.decode_hp.batch_size)

    p_hparams = self.hparams.problem_hparams
    inputs_vocab = p_hparams.vocabulary["inputs"]
    targets_vocab = p_hparams.vocabulary["targets"]
    problem_name = "grapheme_to_phoneme_problem"
    tf.logging.info("Performing decoding from a file.")
    inputs = _get_inputs(filename)
    num_decode_batches = (len(inputs) - 1) // self.decode_hp.batch_size + 1

    def input_fn():
      """Function for inputs generator."""
      input_gen = _decode_batch_input_fn(
          num_decode_batches, inputs, inputs_vocab,
          self.decode_hp.batch_size, self.decode_hp.max_input_size)
      gen_fn = decoding.make_input_fn_from_generator(input_gen)
      example = gen_fn()
      return decoding._decode_input_tensor_to_features_dict(example,
                                                            self.hparams)

    decodes = []
    result_iter = self.estimator.predict(input_fn)
    try:
      for result in result_iter:
        if self.decode_hp.return_beams:
          decoded_inputs = inputs_vocab.decode(
              decoding._save_until_eos(result["inputs"], False))
          beam_decodes = []
          output_beams = np.split(result["outputs"], self.decode_hp.beam_size,
                                  axis=0)
          for k, beam in enumerate(output_beams):
            decoded_outputs = targets_vocab.decode(
                decoding._save_until_eos(beam, False))
            beam_decodes.append(decoded_outputs)
            if outfile:
              outfile.write("%s %s%s" % (decoded_inputs, decoded_outputs,
                  self.decode_hp.delimiter))
            else:
              print("%s %s%s" % (decoded_inputs, decoded_outputs,
                  self.decode_hp.delimiter))
          decodes.append(beam_decodes)
        else:
          decoded_inputs = inputs_vocab.decode(
              decoding._save_until_eos(result["inputs"], False))
          decoded_outputs = targets_vocab.decode(
              decoding._save_until_eos(result["outputs"], False))

          if outfile:
            outfile.write("%s %s%s" % (decoded_inputs, decoded_outputs,
                self.decode_hp.delimiter))
          else:
            print("%s %s%s" % (decoded_inputs, decoded_outputs,
                self.decode_hp.delimiter))

          decodes.append(decoded_outputs)
    except:
      raise StandardError("Invalid model in {}".format(self.params.model_dir))

    return [inputs, decodes]

コード例 #10

0

ファイルを表示

ファイル: transformer_model.py プロジェクト: kltony/tensor2tensor

  def process(self, query):
    """Returns the visualizations for query.

    Args:
      query: The query to process.

    Returns:
      A dictionary of results with processing and graph visualizations.
    """
    tf.logging.info("Processing new query [%s]" %query)

    # Create the new TFDBG hook directory.
    hook_dir = "/tmp/t2t_server_dump/request_%d" %int(time.time())
    os.makedirs(hook_dir)
    hooks = [tfdbg.DumpingDebugHook(hook_dir, watch_fn=topk_watch_fn)]

    # TODO(kstevens): This is extremely hacky and slow for responding to
    # queries.  Figure out a reasonable way to pre-load the model weights before
    # forking and run queries through the estimator quickly.
    def server_input_fn():
      """Generator that returns just the current query."""
      for _ in range(1):
        input_ids = self.source_vocab.encode(query)
        input_ids.append(text_encoder.EOS_ID)
        x = [1, 100, len(input_ids)] + input_ids
        x += [0] * (self.const_array_size - len(x))
        d = {
            "inputs": np.array(x).astype(np.int32),
        }
        yield d

    def input_fn():
      """Generator that returns just the current query."""
      gen_fn = decoding.make_input_fn_from_generator(server_input_fn())
      example = gen_fn()
      # TODO(kstevens): Make this method public
      # pylint: disable=protected-access
      return decoding._interactive_input_tensor_to_features_dict(
          example, self.hparams)

    # Make the prediction for the current query.
    result_iter = self.estimator.predict(input_fn, hooks=hooks)
    result = None
    for result in result_iter:
      break

    # Extract the beam search information by reading the dumped TFDBG event
    # tensors.  We first read and record the per step beam sequences then record
    # the beam scores.  Afterwards we align the two sets of values to create the
    # full graph vertices and edges.
    decoding_graph = graph.Graph()
    run_dirs = sorted(glob.glob(os.path.join(hook_dir, "run_*")))
    for run_dir in run_dirs:
      # Record the different completed and active beam sequence ids.
      alive_sequences = deque()
      finished_sequences = deque()

      # Make the root vertex since it always needs to exist.
      decoding_graph.get_vertex(sequence_key([0]))

      # Create the initial vertices and edges for the active and finished
      # sequences.  We uniquely define each vertex using it's full sequence path
      # as a string to ensure there's no collisions when the same step has two
      # instances of an output id.
      dump_dir = tfdbg.DebugDumpDir(run_dir, validate=False)
      seq_datums = dump_dir.find(predicate=seq_filter)
      for seq_datum in seq_datums:
        sequences = np.array(seq_datum.get_tensor()).astype(int)[0]
        if "alive" in seq_datum.node_name:
          alive_sequences.append(sequences)
        if "finished" in seq_datum.node_name:
          finished_sequences.append(sequences)

        for sequence in sequences:
          pieces = self.targets_vocab.decode_list(sequence)
          index = sequence[-1]
          if index == 0:
            continue

          parent = decoding_graph.get_vertex(sequence_key(sequence[:-1]))
          current = decoding_graph.get_vertex(sequence_key(sequence))

          edge = decoding_graph.add_edge(parent, current)
          edge.data["label"] = pieces[-1]
          edge.data["label_id"] = index
          # Coerce the type to be a python bool.  Numpy bools can't be easily
          # converted to JSON.
          edge.data["completed"] = bool(index == 1)

      # Examine the score results and store the scores with the associated edges
      # in the graph.  We fetch the vertices (and relevant edges) by looking
      # into the saved beam sequences stored above.
      score_datums = dump_dir.find(predicate=scores_filter)
      for score_datum in score_datums:
        if "alive" in score_datum.node_name:
          sequences = alive_sequences.popleft()

        if "finished" in score_datum.node_name:
          sequences = finished_sequences.popleft()

        scores = np.array(score_datum.get_tensor()).astype(float)[0]
        for i, score in enumerate(scores):
          sequence = sequences[i]
          if sequence[-1] == 0:
            continue

          vertex = decoding_graph.get_vertex(sequence_key(sequence))
          edge = decoding_graph.edges[vertex.in_edges[0]]
          edge.data["score"] = score
          edge.data["log_probability"] = score
          edge.data["total_log_probability"] = score

    # Delete the hook dir to save disk space
    shutil.rmtree(hook_dir)

    # Create the graph visualization data structure.
    graph_vis = {
        "visualization_name": "graph",
        "title": "Graph",
        "name": "graph",
        "search_graph": decoding_graph.to_dict(),
    }

    # Create the processing visualization data structure.
    # TODO(kstevens): Make this method public
    # pylint: disable=protected-access
    output_ids = decoding._save_until_eos(result["outputs"].flatten(), False)
    output_pieces = self.targets_vocab.decode_list(output_ids)
    output_token = [{"text": piece} for piece in output_pieces]
    output = self.targets_vocab.decode(output_ids)

    source_steps = [{
        "step_name": "Initial",
        "segment": [{
            "text": query
        }],
    }]

    target_steps = [{
        "step_name": "Initial",
        "segment": output_token,
    }, {
        "step_name": "Final",
        "segment": [{
            "text": output
        }],
    }]

    processing_vis = {
        "visualization_name": "processing",
        "title": "Processing",
        "name": "processing",
        "query_processing": {
            "source_processing": source_steps,
            "target_processing": target_steps,
        },
    }

    return {
        "result": [processing_vis, graph_vis],
    }