def __init__(self, params): super(MTEncoderV1, self).__init__(params) p = self.params assert p.packed_input is False, ( 'Packed inputs are not yet supported for ' 'MTEncoderV1.') with tf.variable_scope(p.name): if p.cc_schedule is not None: self.CreateChild('cc_schedule', p.cc_schedule) self.CreateChild('emb', p.emb) rnn_layers_params = [] # L0 is a bi-directional lstm. # L0's forward lstm cell params = (p.lstm_tpl.Copy() if p.lstm_tpl_bidi is None else p.lstm_tpl_bidi.Copy()) params.name = 'L0_rnn_fwd' params.num_input_nodes = p.emb.embedding_dim params.num_output_nodes = p.lstm_cell_size forward_lstm = params # L0's backward lstm cell params = params.Copy() params.name = 'L0_rnn_bak' backward_lstm = params # L0 layer. params = model_helper.CreateBidirectionalRNNParams( self.params, forward_lstm, backward_lstm) params.name = 'L0' rnn_layers_params.append(params) # The latter layers are all uni-directional lstm. input_size = 2 * p.lstm_cell_size for i in range(1, p.num_lstm_layers): # Forward lstm cell. cell = (p.lstm_tpl.Copy() if p.lstm_tpl_uni is None else p.lstm_tpl_uni.Copy()) cell.name = 'L%d_rnn' % i cell.num_input_nodes = input_size cell.num_output_nodes = p.lstm_cell_size # Forward lstm layer. params = model_helper.CreateUnidirectionalRNNParams( self.params, cell) params.name = 'L%d' % i rnn_layers_params.append(params) input_size = p.lstm_cell_size self.CreateChildren('rnn', rnn_layers_params) dropout_p = layers.DropoutLayer.Params().Set( name='dropout_layer', keep_prob=1.0 - p.dropout_prob, random_seed=p.random_seed + 84828474 if p.random_seed else None) self.CreateChild('dropout', dropout_p)
def __init__(self, params): super(MTEncoderUniRNN, self).__init__(params) p = self.params assert p.packed_input is False, ( 'Packed inputs are not yet supported for ' 'MTEncoderUniRNN.') with tf.variable_scope(p.name): if p.cc_schedule is None: self.cc_schedule = None else: self.CreateChild('cc_schedule', p.cc_schedule) self.CreateChild('emb', p.emb) rnn_layers_params = [] num_input_nodes = p.emb.embedding_dim for i in range(p.num_lstm_layers): cell = p.lstm_tpl.Copy() cell.name = 'L%d_rnn' % i cell.num_input_nodes = num_input_nodes cell.num_output_nodes = p.lstm_cell_size params = model_helper.CreateUnidirectionalRNNParams( self.params, cell) params.name = 'L%d' % i rnn_layers_params.append(params) num_input_nodes = cell.num_output_nodes self.CreateChildren('rnn', rnn_layers_params) dropout_p = layers.DropoutLayer.Params().Set( name='dropout_layer', keep_prob=1.0 - p.dropout_prob, random_seed=p.random_seed + 827366448 if p.random_seed else None) self.CreateChild('dropout', dropout_p) if p.is_transparent: transparent_params = p.transparent_merger_tpl.Copy() transparent_params.name = 'transparent' transparent_params.num_sources = p.num_lstm_layers self.CreateChild('transparent_merger', transparent_params)
def __init__(self, params): super(MTDecoderV1, self).__init__(params) p = self.params assert p.emb.vocab_size == p.softmax.num_classes with tf.variable_scope(p.name): if p.cc_schedule is None: self.cc_schedule = None else: self.CreateChild('cc_schedule', p.cc_schedule) if py_utils.use_tpu(): emb_device = self.cluster.WorkerDeviceInModelSplit(0) else: emb_device = '' with tf.device(emb_device): self.CreateChild('emb', p.emb) p.attention.dtype = p.dtype p.attention.source_dim = p.source_dim p.attention.query_dim = p.rnn_cell_dim p.attention.packed_input = p.packed_input if p.attention.params_init is None: p.attention.params_init = py_utils.WeightInit.Gaussian( 1. / math.sqrt(p.attention.source_dim + p.attention.query_dim)) atten_params = p.attention.Copy() params = p.atten_rnn_cell_tpl.Copy() params.name = 'atten_rnn' params.dtype = p.dtype params.reset_cell_state = p.packed_input params.num_input_nodes = p.emb.embedding_dim + p.attention.source_dim params.num_output_nodes = p.rnn_cell_dim atten_rnn_cell = params.Copy() params = p.atten_rnn_cls.Params() params.name = 'frnn_with_atten' params.dtype = p.dtype params.cell = atten_rnn_cell params.attention = atten_params params.output_prev_atten_ctx = p.use_prev_atten_ctx params.packed_input = p.packed_input params.use_zero_atten_state = p.use_zero_atten_state params.atten_context_dim = p.attention.source_dim self.CreateChild('frnn_with_atten', params) # TODO(zhifengc): Avoid this? self._rnn_attn = self.frnn_with_atten.rnn_cell self._atten = self.frnn_with_atten.attention rnn_layers_params = [] for i in range(1, p.rnn_layers): params = p.rnn_cell_tpl.Copy() params.name = 'rnn%d' % i params.dtype = p.dtype params.num_input_nodes = p.rnn_cell_dim + p.attention.source_dim params.num_output_nodes = p.rnn_cell_dim params.reset_cell_state = p.packed_input rnn_cell_p = params params = model_helper.CreateUnidirectionalRNNParams( self.params, rnn_cell_p) params.name = 'frnn%d' % i params.packed_input = p.packed_input rnn_layers_params.append(params) self.CreateChildren('frnn', rnn_layers_params) p.softmax.dtype = p.dtype if p.feed_attention_context_vec_to_softmax: p.softmax.input_dim = p.rnn_cell_dim + p.attention.source_dim else: p.softmax.input_dim = p.rnn_cell_dim self.CreateChild('softmax', p.softmax)
def CreateUnidirectionalRNNParams(self, forward_p): return model_helper.CreateUnidirectionalRNNParams( self.params, forward_p)