def prediction_graph():
    """Gets features and
    return predicted tokens)
    features: Dict[str:tf.train.features] Contains following features:
              input_k
              seg_id
              input_mask
    """

    features = {
        "input": tf.placeholder(tf.int32, (None, None)),
        "input_mask": tf.placeholder(tf.float32, (None, None))
    }
    batch_size = tf.shape(features['input'])[0]
    input_tensor = features['input']

    # Calculating hidden states of inputs and getting latest logit
    input_mask = features['input_mask']
    target_mask = tf.ones((tf.shape(input_tensor)[0], 1))
    _, mems = get_logits(input_tensor,
                         mems=None,
                         input_mask=input_mask,
                         target_mask=target_mask)

    # logits = tf.reshape(logits,(batch_size,1,-1))
    # latest_toks,latest_confs = sample_token(logits)
    # all_confs = latest_confs
    # all_toks = latest_toks

    def symbols_to_logits_fn(toks, _, mems):
        # We need only last token
        toks = toks[:, -1:]
        # input_mask set all the inputs to be valid
        input_mask = tf.ones_like(toks, dtype=tf.float32)
        # target_mask set to be of ones
        target_mask = tf.ones((tf.shape(toks)[0], 1), dtype=tf.float32)
        mems = [tf.transpose(mems[i],[1,0,2]) if i<len(mems)-1 else \
                tf.transpose(mems[i],[1,0])
                for i in range(len(mems))]
        logits, mems = get_logits(toks,
                                  mems=mems,
                                  input_mask=input_mask,
                                  target_mask=target_mask)
        return logits,{i:tf.transpose(mems[i],[1,0,2]) if i<len(mems)-1 else \
                      tf.transpose(mems[i],[1,0])
                      for i in range(len(mems))}

    lang_id = ENG_ID if FLAGS.tgt_lang == "english" else HIN_ID
    initial_ids = tf.ones((batch_size), dtype=tf.int32) * lang_id

    mems = {i:tf.transpose(mems[i],[1,0,2]) if i<len(mems)-1 else \
              tf.transpose(mems[i],[1,0])
              for i in range(len(mems))}

    decoded_ids, scores = beam_search.sequence_beam_search(
        symbols_to_logits_fn, initial_ids, mems, FLAGS.n_token,
        FLAGS.beam_size, FLAGS.beam_alpha, FLAGS.max_decode_length, EOS_ID)
    top_decoded_ids = decoded_ids[:, 0, 1:]
    top_scores = scores[:, 0]
    return (top_decoded_ids, top_scores), features
Beispiel #2
0
    def predict(self, encoder_outputs, encoder_decoder_attention_bias,
                training):
        encoder_outputs = tf.cast(encoder_outputs, self.params['dtype'])
        if self.params['padded_decode']:
            batch_size = encoder_outputs.shape.as_list()[0]
            input_length = encoder_outputs.shape.as_list()[1]
        else:
            batch_size = tf.shape(encoder_outputs)[0]
            input_length = tf.shape(encoder_outputs)[1]
        max_decode_length = input_length + self.params['extra_decode_length']
        encoder_decoder_attention_bias = tf.cast(
            encoder_decoder_attention_bias, self.params['dtype'])

        symbols_to_logits_fn = self._get_symbols_to_logits_fn(
            max_decode_length, training)
        initial_ids = tf.zeros([batch_size], tf.int32)

        init_decode_length = (max_decode_length
                              if self.params['padded_decode'] else 0)
        num_heads = self.params['num_heads']
        dim_per_head = self.params['hidden_size'] // num_heads
        cache = {
            "layer_%d" % layer: {
                "k":
                tf.zeros(
                    [batch_size, init_decode_length, num_heads, dim_per_head],
                    dtype=self.params["dtype"]),
                "v":
                tf.zeros(
                    [batch_size, init_decode_length, num_heads, dim_per_head],
                    dtype=self.params["dtype"])
            }
            for layer in range(self.params["num_hidden_layers"])
        }

        # Add encoder output and attention bias to the cache.
        cache["encoder_outputs"] = encoder_outputs
        cache[
            "encoder_decoder_attention_bias"] = encoder_decoder_attention_bias

        # Use beam search to find the top beam_size sequences and scores.
        decoded_ids, scores = sequence_beam_search(
            symbols_to_logits_fn=symbols_to_logits_fn,
            initial_ids=initial_ids,
            initial_cache=cache,
            vocab_size=self.params["vocab_size"],
            beam_size=self.params["beam_size"],
            alpha=self.params["alpha"],
            max_decode_length=max_decode_length,
            eos_id=EOS_ID,
            padded_decode=self.params["padded_decode"],
            dtype=self.params["dtype"])

        # Get the top sequence for each batch element
        top_decoded_ids = decoded_ids[:, 0, 1:]
        top_scores = scores[:, 0]

        return {'outputs': top_decoded_ids, 'scores': top_scores}
Beispiel #3
0
    def predict(self, encoder_outputs, encoder_decoder_attention_bias):
        """Return predicted sequence."""
        batch_size = tf.shape(encoder_outputs)[0]
        input_length = tf.shape(encoder_outputs)[1]
        max_decode_length = self.params["extra_decode_length"]

        symbols_to_logits_fn = self._get_symbols_to_logits_fn(
            max_decode_length)

        # Create initial set of IDs that will be passed into symbols_to_logits_fn.
        initial_ids = tf.zeros([batch_size], dtype=tf.int32)

        # Create cache storing decoder attention values for each layer.
        cache = {
            "layer_%d" % layer: {
                "k": tf.zeros([batch_size, 0, self.params["hidden_size"]]),
                "v": tf.zeros([batch_size, 0, self.params["hidden_size"]]),
            }
            for layer in range(self.params["num_hidden_layers"])
        }

        # Add encoder output and attention bias to the cache.
        cache["encoder_outputs"] = encoder_outputs
        cache[
            "encoder_decoder_attention_bias"] = encoder_decoder_attention_bias

        # Use beam search to find the top beam_size sequences and scores.
        decoded_ids, scores = beam_search.sequence_beam_search(
            symbols_to_logits_fn=symbols_to_logits_fn,
            initial_ids=initial_ids,
            initial_cache=cache,
            vocab_size=self.params["vocab_size"],
            beam_size=self.params["beam_size"],
            alpha=self.params["alpha"],
            max_decode_length=max_decode_length,
            eos_id=self.EOS_ID)

        # Get the top sequence for each batch element
        top_decoded_ids = decoded_ids[:, 0, 1:]
        top_scores = scores[:, 0]

        return {"outputs": top_decoded_ids, "scores": top_scores}
Beispiel #4
0
    def predict(self, encoder_outputs, encoder_decoder_attention_bias, training):
        """Return predicted sequence."""
        encoder_outputs = tf.cast(encoder_outputs, self.params['dtype'])
        batch_size = tf.shape(encoder_outputs)[0]
        input_length = tf.shape(encoder_outputs)[1]
        max_decode_length = input_length + self.params['extra_decode_length']

        symbols_to_logits_fn = self._get_symbols_to_logits_fn(max_decode_length, training)

        # Create initial set of IDs that will be passed into symbols_to_logits_fn.
        initial_ids = tf.zeros([batch_size], dtype=tf.int32)
        cache = {
            'layer_{}'.format(layer): {
                'k': tf.zeros([batch_size, 0, self.params['hidden_size']], dtype=tf.float32),
                'v': tf.zeros([batch_size, 0, self.params['hidden_size']], dtype=tf.float32)
            } for layer in range(self.params['num_hidden_layers'])
        }
        cache['encoder_outputs'] = encoder_outputs
        cache['encoder_decoder_attention_bias'] = encoder_decoder_attention_bias

        # Use beam search to find the top beam_size sequences and scores.
        decoded_ids, scores = beam_search.sequence_beam_search(
            symbols_to_logits_fn=symbols_to_logits_fn,
            initial_ids=initial_ids,
            initial_cache=cache,
            vocab_size=self.params["target_vocab_size"],
            beam_size=self.params["beam_size"],
            alpha=self.params["alpha"],
            max_decode_length=max_decode_length,
            eos_id=EOS_ID)

        # Get the top sequence for each batch element
        top_decoded_ids = decoded_ids[:, 0, 1:]
        top_scores = scores[:, 0]

        return {'outputs': top_decoded_ids, 'scores': top_scores}
Beispiel #5
0
  def predict(self, encoder_outputs, encoder_decoder_attention_bias, training):
    """Return predicted sequence."""
    encoder_outputs = tf.cast(encoder_outputs, self.params["dtype"])
    if self.params["padded_decode"]:
      batch_size = encoder_outputs.shape.as_list()[0]
      input_length = encoder_outputs.shape.as_list()[1]
    else:
      batch_size = tf.shape(encoder_outputs)[0]
      input_length = tf.shape(encoder_outputs)[1]
    max_decode_length = input_length + self.params["extra_decode_length"]
    encoder_decoder_attention_bias = tf.cast(encoder_decoder_attention_bias,
                                             self.params["dtype"])

    symbols_to_logits_fn = self._get_symbols_to_logits_fn(
        max_decode_length, training)

    # Create initial set of IDs that will be passed into symbols_to_logits_fn.
    initial_ids = tf.zeros([batch_size], dtype=tf.int32)

    # Create cache storing decoder attention values for each layer.
    # pylint: disable=g-complex-comprehension
    init_decode_length = (
        max_decode_length if self.params["padded_decode"] else 0)
    num_heads = self.params["num_heads"]
    dim_per_head = self.params["hidden_size"] // num_heads
    cache = {
        "layer_%d" % layer: {
            "k":
                tf.zeros([
                    batch_size, init_decode_length, num_heads, dim_per_head
                ],
                         dtype=self.params["dtype"]),
            "v":
                tf.zeros([
                    batch_size, init_decode_length, num_heads, dim_per_head
                ],
                         dtype=self.params["dtype"])
        } for layer in range(self.params["num_hidden_layers"])
    }
    # pylint: enable=g-complex-comprehension

    # Add encoder output and attention bias to the cache.
    cache["encoder_outputs"] = encoder_outputs
    cache["encoder_decoder_attention_bias"] = encoder_decoder_attention_bias

    # Use beam search to find the top beam_size sequences and scores.
    decoded_ids, scores = beam_search.sequence_beam_search(
        symbols_to_logits_fn=symbols_to_logits_fn,
        initial_ids=initial_ids,
        initial_cache=cache,
        vocab_size=self.params["vocab_size"],
        beam_size=self.params["beam_size"],
        alpha=self.params["alpha"],
        max_decode_length=max_decode_length,
        eos_id=EOS_ID,
        padded_decode=self.params["padded_decode"],
        dtype=self.params["dtype"])

    # Get the top sequence for each batch element
    top_decoded_ids = decoded_ids[:, 0, 1:]
    top_scores = scores[:, 0]

    return {"outputs": top_decoded_ids, "scores": top_scores}