Example #1
0
    def __init__(self, params):
        super(MTEncoderV1, self).__init__(params)
        p = self.params
        assert p.packed_input is False, (
            'Packed inputs are not yet supported for '
            'MTEncoderV1.')

        with tf.variable_scope(p.name):
            if p.cc_schedule is not None:
                self.CreateChild('cc_schedule', p.cc_schedule)

            self.CreateChild('emb', p.emb)

            rnn_layers_params = []

            # L0 is a bi-directional lstm.

            # L0's forward lstm cell
            params = (p.lstm_tpl.Copy()
                      if p.lstm_tpl_bidi is None else p.lstm_tpl_bidi.Copy())
            params.name = 'L0_rnn_fwd'
            params.num_input_nodes = p.emb.embedding_dim
            params.num_output_nodes = p.lstm_cell_size
            forward_lstm = params

            # L0's backward lstm cell
            params = params.Copy()
            params.name = 'L0_rnn_bak'
            backward_lstm = params

            # L0 layer.
            params = model_helper.CreateBidirectionalRNNParams(
                self.params, forward_lstm, backward_lstm)
            params.name = 'L0'
            rnn_layers_params.append(params)

            # The latter layers are all uni-directional lstm.
            input_size = 2 * p.lstm_cell_size
            for i in range(1, p.num_lstm_layers):
                # Forward lstm cell.
                cell = (p.lstm_tpl.Copy()
                        if p.lstm_tpl_uni is None else p.lstm_tpl_uni.Copy())
                cell.name = 'L%d_rnn' % i
                cell.num_input_nodes = input_size
                cell.num_output_nodes = p.lstm_cell_size
                # Forward lstm layer.
                params = model_helper.CreateUnidirectionalRNNParams(
                    self.params, cell)
                params.name = 'L%d' % i
                rnn_layers_params.append(params)
                input_size = p.lstm_cell_size

            self.CreateChildren('rnn', rnn_layers_params)

            dropout_p = layers.DropoutLayer.Params().Set(
                name='dropout_layer',
                keep_prob=1.0 - p.dropout_prob,
                random_seed=p.random_seed +
                84828474 if p.random_seed else None)
            self.CreateChild('dropout', dropout_p)
Example #2
0
    def __init__(self, params):
        super(MTEncoderUniRNN, self).__init__(params)
        p = self.params
        assert p.packed_input is False, (
            'Packed inputs are not yet supported for '
            'MTEncoderUniRNN.')

        with tf.variable_scope(p.name):
            if p.cc_schedule is None:
                self.cc_schedule = None
            else:
                self.CreateChild('cc_schedule', p.cc_schedule)

            self.CreateChild('emb', p.emb)

            rnn_layers_params = []

            num_input_nodes = p.emb.embedding_dim
            for i in range(p.num_lstm_layers):
                cell = p.lstm_tpl.Copy()
                cell.name = 'L%d_rnn' % i
                cell.num_input_nodes = num_input_nodes
                cell.num_output_nodes = p.lstm_cell_size
                params = model_helper.CreateUnidirectionalRNNParams(
                    self.params, cell)
                params.name = 'L%d' % i
                rnn_layers_params.append(params)
                num_input_nodes = cell.num_output_nodes

            self.CreateChildren('rnn', rnn_layers_params)

            dropout_p = layers.DropoutLayer.Params().Set(
                name='dropout_layer',
                keep_prob=1.0 - p.dropout_prob,
                random_seed=p.random_seed +
                827366448 if p.random_seed else None)
            self.CreateChild('dropout', dropout_p)

            if p.is_transparent:
                transparent_params = p.transparent_merger_tpl.Copy()
                transparent_params.name = 'transparent'
                transparent_params.num_sources = p.num_lstm_layers
                self.CreateChild('transparent_merger', transparent_params)
Example #3
0
  def __init__(self, params):
    super(MTDecoderV1, self).__init__(params)
    p = self.params
    assert p.emb.vocab_size == p.softmax.num_classes

    with tf.variable_scope(p.name):
      if p.cc_schedule is None:
        self.cc_schedule = None
      else:
        self.CreateChild('cc_schedule', p.cc_schedule)

      if py_utils.use_tpu():
        emb_device = self.cluster.WorkerDeviceInModelSplit(0)
      else:
        emb_device = ''
      with tf.device(emb_device):
        self.CreateChild('emb', p.emb)

        p.attention.dtype = p.dtype
        p.attention.source_dim = p.source_dim
        p.attention.query_dim = p.rnn_cell_dim
        p.attention.packed_input = p.packed_input
        if p.attention.params_init is None:
          p.attention.params_init = py_utils.WeightInit.Gaussian(
              1. / math.sqrt(p.attention.source_dim + p.attention.query_dim))
        atten_params = p.attention.Copy()

        params = p.atten_rnn_cell_tpl.Copy()
        params.name = 'atten_rnn'
        params.dtype = p.dtype
        params.reset_cell_state = p.packed_input
        params.num_input_nodes = p.emb.embedding_dim + p.attention.source_dim
        params.num_output_nodes = p.rnn_cell_dim
        atten_rnn_cell = params.Copy()

        params = p.atten_rnn_cls.Params()
        params.name = 'frnn_with_atten'
        params.dtype = p.dtype
        params.cell = atten_rnn_cell
        params.attention = atten_params
        params.output_prev_atten_ctx = p.use_prev_atten_ctx
        params.packed_input = p.packed_input
        params.use_zero_atten_state = p.use_zero_atten_state
        params.atten_context_dim = p.attention.source_dim
        self.CreateChild('frnn_with_atten', params)

        # TODO(zhifengc): Avoid this?
        self._rnn_attn = self.frnn_with_atten.rnn_cell
        self._atten = self.frnn_with_atten.attention

        rnn_layers_params = []
        for i in range(1, p.rnn_layers):
          params = p.rnn_cell_tpl.Copy()
          params.name = 'rnn%d' % i
          params.dtype = p.dtype
          params.num_input_nodes = p.rnn_cell_dim + p.attention.source_dim
          params.num_output_nodes = p.rnn_cell_dim
          params.reset_cell_state = p.packed_input
          rnn_cell_p = params

          params = model_helper.CreateUnidirectionalRNNParams(
              self.params, rnn_cell_p)
          params.name = 'frnn%d' % i
          params.packed_input = p.packed_input
          rnn_layers_params.append(params)

        self.CreateChildren('frnn', rnn_layers_params)

      p.softmax.dtype = p.dtype
      if p.feed_attention_context_vec_to_softmax:
        p.softmax.input_dim = p.rnn_cell_dim + p.attention.source_dim
      else:
        p.softmax.input_dim = p.rnn_cell_dim
      self.CreateChild('softmax', p.softmax)
Example #4
0
 def CreateUnidirectionalRNNParams(self, forward_p):
     return model_helper.CreateUnidirectionalRNNParams(
         self.params, forward_p)