Ejemplo n.º 1
0
  def _compute_logits(self, rnn_out):
    if self._num_layers == 1 and self._weights is not None:
      assert tensor_utils.shape(rnn_out, -1) == self._hidden_dim

    if self._num_layers == 1:
      with tf.variable_scope("mlp1", reuse=self._reuse):
        if self._weights is None:
          scale = (3.0 / self._hidden_dim) ** 0.5
          weight_initializer = tf.random_uniform_initializer(
              minval=-scale, maxval=scale)
          self._linear1 = Linear(
              rnn_out,
              self._output_size,
              True, weights=None,
              weight_initializer=weight_initializer)
        else:
          self._linear1 = Linear(
              rnn_out, self._output_size, True, weights=self._weights)
        logits = self._linear1(rnn_out)
    else:
      assert False
      assert self._num_layers == 2
      with tf.variable_scope("mlp1", reuse=self._reuse):
        if self._linear1 is None:
          self._linear1 = Linear(
              rnn_out, self._hidden_dim, True,
              weights=None,
              weight_initializer=tf.contrib.layers.xavier_initializer())
        hidden = self._linear1(rnn_out)
        if self._activation:
          hidden = self._activation(hidden)

        if self._mode == tf.estimator.ModeKeys.TRAIN and self._dropout > 0.:
          hidden = tf.nn.dropout(hidden, keep_prob=1.-self._dropout)

      with tf.variable_scope("mlp2", reuse=self._reuse):
        if self._linear2 is None:
          if self._weights is None:
            scale = (3.0 / self._hidden_dim) ** 0.5
            weight_initializer = tf.random_uniform_initializer(
                minval=-scale, maxval=scale)
            self._linear2 = Linear(
                hidden,
                self._output_size,
                True, weights=None,
                weight_initializer=weight_initializer)
          else:
            self._linear2 = Linear(
                hidden, self._output_size, True, weights=self._weights)

        logits = self._linear2(hidden)
    return logits
Ejemplo n.º 2
0
def beam_decoder(features, mode, vocab, encoder_outputs, hps):
    """Beam search decoder.

  Args:
    features: Dictionary of input Tensors.
    mode: train or eval. Keys from tf.estimator.ModeKeys.
    vocab: A list of strings of words in the vocabulary.
    encoder_outputs: output tensors from the encoder
    hps: Hyperparams.

  Returns:
    Decoder outputs
  """
    assert mode is not tf.estimator.ModeKeys.TRAIN, "Not using beam in training."
    embeddings = encoder_outputs.embeddings
    mem_input = encoder_outputs.mem_input
    batch_size = tensor_utils.shape(features["src_len"], 0)

    src_len, src_inputs = features["src_len"], features["src_inputs"]
    src_mask = tf.sequence_mask(src_len, tf.shape(src_inputs)[1])

    if hps.att_neighbor:
        neighbor_len, neighbor_inputs \
            = features["neighbor_len"], features["neighbor_inputs"]
        neighbor_mask = tf.sequence_mask(neighbor_len,
                                         tf.shape(neighbor_inputs)[1])
        inputs = tf.concat([src_inputs, neighbor_inputs], 1)
        # lens = src_len + neighbor_len
        mask = tf.concat([src_mask, neighbor_mask], axis=1)
        lens = tf.shape(mask)[1] * tf.ones([batch_size], tf.int32)
    else:
        inputs = features["src_inputs"]
        lens = features["src_len"]
        mask = src_mask

    sparse_inputs = None
    float_mask = tf.cast(mask, dtype=tf.float32)
    if hps.use_copy:
        sparse_inputs = sparse_map(tf.expand_dims(inputs, axis=2),
                                   tf.expand_dims(float_mask, axis=2),
                                   vocab.size())
        sparse_inputs = sparse_tile_batch(sparse_inputs,
                                          multiplier=hps.beam_width)

    tiled_mask = tf.contrib.seq2seq.tile_batch(mask, multiplier=hps.beam_width)
    inputs = tf.contrib.seq2seq.tile_batch(inputs, multiplier=hps.beam_width)
    lens = tf.contrib.seq2seq.tile_batch(lens, multiplier=hps.beam_width)

    def _beam_decode(cell):
        """Beam decode."""
        with tf.variable_scope("beam_decoder"):

            initial_state = cell.zero_state(batch_size=batch_size *
                                            hps.beam_width,
                                            dtype=tf.float32)
            if hps.use_bridge:
                h_state, c_state = encoder_outputs.states
                h_state = tf.contrib.seq2seq.tile_batch(
                    h_state, multiplier=hps.beam_width)
                c_state = tf.contrib.seq2seq.tile_batch(
                    c_state, multiplier=hps.beam_width)
                initial_cell_state = tf.contrib.rnn.LSTMStateTuple(
                    h_state, c_state)
                initial_state = initial_state.clone(
                    cell_state=(initial_cell_state, ))

            decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                cell=cell,
                embedding=embeddings,
                start_tokens=tf.fill([batch_size],
                                     vocab.word2id(data.START_DECODING)),
                end_token=vocab.word2id(data.STOP_DECODING),
                initial_state=initial_state,
                beam_width=hps.beam_width,
                length_penalty_weight=hps.length_norm,
                coverage_penalty_weight=hps.cp)
        with tf.variable_scope("dynamic_decode", reuse=tf.AUTO_REUSE):
            decoder_outputs, _, decoder_len = tf.contrib.seq2seq.dynamic_decode(
                decoder=decoder, maximum_iterations=hps.max_dec_steps)
        return decoder_outputs, decoder_len

    # [batch_size*beam_width, src_len, encoder_dim]
    att_context = tf.contrib.seq2seq.tile_batch(encoder_outputs.att_context,
                                                multiplier=hps.beam_width)
    copy_context = None
    if hps.use_copy:
        copy_context = tf.contrib.seq2seq.tile_batch(
            encoder_outputs.copy_context, multiplier=hps.beam_width)
    with tf.variable_scope("attention", reuse=tf.AUTO_REUSE):
        if hps.att_type == "luong":
            attention = tf.contrib.seq2seq.LuongAttention(
                num_units=hps.decoder_dim,
                memory=att_context,
                memory_sequence_length=lens)
        elif hps.att_type == "bahdanau":
            attention = tf.contrib.seq2seq.BahdanauAttention(
                num_units=hps.decoder_dim,
                memory=att_context,
                memory_sequence_length=lens)
        elif hps.att_type == "hyper":
            attention = HyperAttention(num_units=hps.decoder_dim,
                                       mem_input=mem_input,
                                       hps=hps,
                                       memory=att_context,
                                       use_beam=True,
                                       memory_sequence_length=lens)
        elif hps.att_type == "my":
            attention = MyAttention(num_units=hps.decoder_dim,
                                    memory=att_context,
                                    memory_sequence_length=lens,
                                    mask=tiled_mask)

    with tf.variable_scope("rnn_decoder", reuse=tf.AUTO_REUSE):
        decoder_cell = get_rnn_cell(mode=mode,
                                    hps=hps,
                                    input_dim=hps.decoder_dim + hps.emb_dim,
                                    num_units=hps.decoder_dim,
                                    num_layers=hps.num_decoder_layers,
                                    dropout=hps.decoder_drop,
                                    mem_input=mem_input,
                                    use_beam=True,
                                    cell_type=hps.rnn_cell)
    with tf.variable_scope("attention_wrapper", reuse=tf.AUTO_REUSE):
        decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
            decoder_cell,
            attention,
            attention_layer_size=hps.decoder_dim,
            alignment_history=hps.use_copy)

    with tf.variable_scope("output_projection", reuse=tf.AUTO_REUSE):
        weights = tf.transpose(embeddings) if hps.tie_embedding else None
        hidden_dim = hps.emb_dim if hps.tie_embedding else hps.decoder_dim
        decoder_cell = OutputWrapper(
            decoder_cell,
            num_layers=hps.num_mlp_layers,
            hidden_dim=hidden_dim,
            output_size=vocab.size() if hps.tie_embedding else hps.output_size,
            weights=weights,
            dropout=hps.out_drop,
            use_copy=hps.use_copy,
            encoder_emb=copy_context,
            sparse_inputs=sparse_inputs,
            mask=tf.cast(tiled_mask, dtype=tf.float32),
            hps=hps,
            mode=mode,
            reuse=tf.AUTO_REUSE)

    decoder_outputs, decoder_len = _beam_decode(decoder_cell)

    return DecoderOutputs(decoder_outputs=decoder_outputs,
                          decoder_len=decoder_len)
Ejemplo n.º 3
0
def basic_decoder(features, mode, vocab, encoder_outputs, hps):
    """Decoder.

  Args:
    features: Dictionary of input Tensors.
    mode: train or eval. Keys from tf.estimator.ModeKeys.
    vocab: A list of strings of words in the vocabulary.
    encoder_outputs: output tensors from the encoder
    hps: Hyperparams.

  Returns:
    Decoder outputs
  """

    embeddings = encoder_outputs.embeddings
    mem_input = encoder_outputs.mem_input
    batch_size = tensor_utils.shape(mem_input, 0)

    src_len, src_inputs = features["src_len"], features["src_inputs"]
    src_mask = tf.sequence_mask(src_len, tf.shape(src_inputs)[1])

    if hps.att_neighbor:
        neighbor_len, neighbor_inputs \
            = features["neighbor_len"], features["neighbor_inputs"]
        neighbor_mask = tf.sequence_mask(neighbor_len,
                                         tf.shape(neighbor_inputs)[1])
        inputs = tf.concat([src_inputs, neighbor_inputs], 1)
        lens = src_len + neighbor_len
        mask = tf.concat([src_mask, neighbor_mask], axis=1)
    else:
        inputs = features["src_inputs"]
        lens = features["src_len"]
        mask = src_mask

    sparse_inputs = None
    float_mask = tf.cast(mask, dtype=tf.float32)

    if hps.use_copy:
        sparse_inputs = sparse_map(tf.expand_dims(inputs, axis=2),
                                   tf.expand_dims(float_mask, axis=2),
                                   vocab.size())
    # [batch_size, dec_len]
    decoder_inputs = features["decoder_inputs"]

    # [batch_size, dec_len, emb_dim]
    decoder_input_emb = tf.nn.embedding_lookup(embeddings, decoder_inputs)
    if mode == tf.estimator.ModeKeys.TRAIN and hps.emb_drop > 0.:
        decoder_input_emb = tf.nn.dropout(decoder_input_emb,
                                          keep_prob=1.0 - hps.emb_drop)

    def _decode(cell, helper):
        """Decode function.

    Args:
      cell: rnn cell
      helper: a helper instance from tf.contrib.seq2seq.

    Returns:
      decoded outputs and lengths.
    """
        with tf.variable_scope("decoder"):
            initial_state = cell.zero_state(batch_size, tf.float32)
            if hps.use_bridge:
                h_state, c_state = encoder_outputs.states
                initial_cell_state = tf.contrib.rnn.LSTMStateTuple(
                    h_state, c_state)
                initial_state = initial_state.clone(
                    cell_state=(initial_cell_state, ))

            decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=cell, helper=helper, initial_state=initial_state)
        with tf.variable_scope("dynamic_decode", reuse=tf.AUTO_REUSE):
            decoder_outputs, _, decoder_len = tf.contrib.seq2seq.dynamic_decode(
                decoder=decoder, maximum_iterations=hps.max_dec_steps)
        return decoder_outputs, decoder_len

    att_context = encoder_outputs.att_context
    with tf.variable_scope("attention"):
        if hps.att_type == "luong":
            attention = tf.contrib.seq2seq.LuongAttention(
                num_units=hps.decoder_dim,
                memory=att_context,
                memory_sequence_length=lens)
        elif hps.att_type == "bahdanau":
            attention = tf.contrib.seq2seq.BahdanauAttention(
                num_units=hps.decoder_dim,
                memory=att_context,
                memory_sequence_length=lens)
        elif hps.att_type == "hyper":
            attention = HyperAttention(num_units=hps.decoder_dim,
                                       mem_input=mem_input,
                                       hps=hps,
                                       memory=att_context,
                                       use_beam=False,
                                       memory_sequence_length=lens)
        elif hps.att_type == "my":
            attention = MyAttention(num_units=hps.decoder_dim,
                                    memory=att_context,
                                    memory_sequence_length=lens,
                                    mask=mask)

    with tf.variable_scope("rnn_decoder"):
        decoder_cell = get_rnn_cell(mode=mode,
                                    hps=hps,
                                    input_dim=hps.decoder_dim + hps.emb_dim,
                                    num_units=hps.decoder_dim,
                                    num_layers=hps.num_decoder_layers,
                                    dropout=hps.decoder_drop,
                                    mem_input=mem_input,
                                    use_beam=False,
                                    cell_type=hps.rnn_cell)
    with tf.variable_scope("attention_wrapper"):
        decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
            decoder_cell,
            attention,
            attention_layer_size=hps.decoder_dim,
            alignment_history=hps.use_copy)

    with tf.variable_scope("output_projection"):
        weights = tf.transpose(embeddings) if hps.tie_embedding else None
        hidden_dim = hps.emb_dim if hps.tie_embedding else hps.decoder_dim
        decoder_cell = OutputWrapper(
            decoder_cell,
            num_layers=hps.num_mlp_layers,
            hidden_dim=hidden_dim,
            output_size=hps.output_size,
            #  output_size=vocab.size(),
            weights=weights,
            dropout=hps.out_drop,
            use_copy=hps.use_copy,
            encoder_emb=encoder_outputs.copy_context,
            sparse_inputs=sparse_inputs,
            mask=float_mask,
            mode=mode,
            hps=hps)

    if mode == tf.estimator.ModeKeys.TRAIN:
        if hps.sampling_probability > 0.:
            helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
                inputs=decoder_input_emb,
                sequence_length=features["decoder_len"],
                embedding=embeddings,
                sampling_probability=hps.sampling_probability)
        else:
            helper = tf.contrib.seq2seq.TrainingHelper(decoder_input_emb,
                                                       features["decoder_len"])
        decoder_outputs, _ = _decode(decoder_cell, helper=helper)
        return DecoderOutputs(decoder_outputs=decoder_outputs,
                              decoder_len=features["decoder_len"]), None

    # used to compute loss
    teacher_helper = tf.contrib.seq2seq.TrainingHelper(decoder_input_emb,
                                                       features["decoder_len"])
    teacher_decoder_outputs, _ = _decode(decoder_cell, helper=teacher_helper)

    helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
        embedding=embeddings,
        start_tokens=tf.fill([batch_size], vocab.word2id(data.START_DECODING)),
        end_token=vocab.word2id(data.STOP_DECODING))
    decoder_outputs, decoder_len = _decode(decoder_cell, helper=helper)
    return DecoderOutputs(
        decoder_outputs=decoder_outputs,
        decoder_len=decoder_len), \
           DecoderOutputs(
               decoder_outputs=teacher_decoder_outputs,
               decoder_len=features["decoder_len"]
           )
Ejemplo n.º 4
0
    def __init__(self,
                 cell,
                 num_layers,
                 hidden_dim,
                 output_size,
                 weights=None,
                 activation=tf.tanh,
                 dropout=0.,
                 use_copy=False,
                 encoder_emb=None,
                 sparse_inputs=None,
                 mask=None,
                 hps=None,
                 mode=tf_estimator.ModeKeys.EVAL,
                 reuse=None):
        """Create a cell with output projection.

    Args:
      cell: an RNNCell, a projection to output_size is added to it.
      num_layers: number of MLP layers.
      hidden_dim: hidden size of the MLP.
      output_size: integer, the size of the output after projection.
      weights: (optional) a specified tensor.
      activation: (optional) an optional activation function.
      dropout: dropout rate for dropout at the output layer.
      use_copy: Use copy mechanism or not.
      encoder_emb: Outputs of the encoder.
      sparse_inputs: Sparse inputs.
      mask: mask.
      hps: Hyperparameters.
      mode: train/eval.
      reuse: (optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.

    Raises:
      TypeError: if cell is not an RNNCell.
      ValueError: if output_size is not positive.
    """
        super(OutputWrapper, self).__init__(_reuse=reuse)
        if output_size < 1:
            raise ValueError("Parameter output_size must be > 0: %d." %
                             output_size)
        self._cell = cell
        self._num_layers = num_layers
        self._activation = activation
        self._weights = weights
        if self._weights is None:
            self._output_size = output_size
        else:
            self._output_size = tensor_utils.shape(self._weights, 1)

        self._hidden_dim = hidden_dim
        self._dropout = dropout
        self._reuse = reuse
        self._mode = None
        self._sigmoid = tf.sigmoid
        self._linear1, self._linear2, self._linear_copy = None, None, None
        assert self._num_layers <= 2
        self._use_copy = use_copy
        self._encoder_emb = encoder_emb
        self._sparse_inputs, self._mask = sparse_inputs, mask
        self._mode = mode
        self._reuse_attention = hps.reuse_attention
        if self._use_copy:
            assert self._sparse_inputs is not None
            assert self._mask is not None
            if not self._reuse_attention:
                assert self._encoder_emb is not None
                encoder_dim = tf.shape(self._encoder_emb)[-1]
                if encoder_dim != self._hidden_dim:
                    assert False
        self._eps = 1e-8
        self._vocab_offset = hps.output_size