Exemple #1
0
 def _build_pre(self):
     self.dimA = 20
     self.cellA = MultiRNNCell([LSTMCell(self.dimA)] * 2)
     self.b1 = 0.95
     self.b2 = 0.95
     self.lr = 0.1
     self.eps = 1e-8
Exemple #2
0
    def impress(self, state_code, pre_impress_states):
        # LSTM, 3 layers
        self.impress_lay_num = 3
        with tf.variable_scope('impress', reuse=tf.AUTO_REUSE):
            def loop_fn(time, cell_output, cell_state, loop_state):
                if cell_output is None:#time = 0
                    # initialization
                    input = state_code
                    state = state_
                    emit_output = None
                    loop_state = None
                else:
                    input = cell_output
                    emit_output = cell_output
                    state = cell_state
                    loop_state = None
                    
                elements_finished = (time >= 1)
                return (elements_finished, input, state, emit_output, loop_state)

            multirnn_cell = MultiRNNCell([LSTMCell(self.impress_dim) 
                    for _ in range(self.impress_lay_num)],  state_is_tuple=True) 
            
            if pre_impress_states == None:
                state_ = (multirnn_cell.zero_state(self.batch_size, tf.float32))
            else:
                state_ = pre_impress_states   
    
            emit_ta, states, final_loop_state = tf.nn.raw_rnn(multirnn_cell, loop_fn)
            state_impress_code = tf.transpose(emit_ta.stack(), [1, 0, 2])[0] # transpose for putting batch dimension to first dimension
            
            return state_impress_code, final_loop_state
Exemple #3
0
 def _build_pre(self, size):
     self.dimA = size
     self.num_of_layers = 2
     self.cellA = MultiRNNCell([LSTMCell(num_units=self.dimA) for _ in range(self.num_of_layers)])
     self.b1 = 0.95
     self.b2 = 0.95
     self.lr = 0.1
     self.eps = 1e-8
    def _build_decoder_cell(self):
        # no beam
        encoder_outputs = self.encoder_outputs
        encoder_last_state = self.encoder_last_state
        encoder_inputs_length = self.encoder_inputs_length

        def attn_decoder_input_fn(inputs, attention):
            if not self.attn_input_feeding:
                return inputs
            _input_layer = Dense(self.hidden_units, dtype=self.dtype, name="attn_input_feeding")
            return _input_layer(array_ops.concat([inputs, attention], -1))
        
        # attention mechanism 'luong'
        with tf.variable_scope('shared_attention_mechanism'):
            self.attention_mechanism = attention_wrapper.LuongAttention(num_units=self.hidden_units, \
                                                                        memory=encoder_outputs, memory_sequence_length=encoder_inputs_length)        
        # build decoder cell
        self.init_decoder_cell_list = [self._build_single_cell() for i in range(self.depth)]
        decoder_initial_state = encoder_last_state
        
        self.decoder_cell_list = self.init_decoder_cell_list[:-1] + [attention_wrapper.AttentionWrapper(\
            cell = self.init_decoder_cell_list[-1], \
            attention_mechanism=self.attention_mechanism,\
            attention_layer_size=self.hidden_units,\
            cell_input_fn=attn_decoder_input_fn,\
            initial_cell_state=encoder_last_state[-1],\
            alignment_history=False)]
        batch_size = self.batch_size
        initial_state = [state for state in encoder_last_state]
        initial_state[-1] = self.decoder_cell_list[-1].zero_state(batch_size=batch_size, dtype=self.dtype)
        decoder_initial_state = tuple(initial_state)
        
        # beam
        beam_encoder_outputs = seq2seq.tile_batch(self.encoder_outputs, multiplier=self.beam_width)
        beam_encoder_last_state = nest.map_structure(lambda s: seq2seq.tile_batch(s, self.beam_width), self.encoder_last_state)
        beam_encoder_inputs_length = seq2seq.tile_batch(self.encoder_inputs_length, multiplier=self.beam_width)

        with tf.variable_scope('shared_attention_mechanism', reuse=True):
            self.beam_attention_mechanism = attention_wrapper.LuongAttention(num_units=self.hidden_units, \
                                                                             memory=beam_encoder_outputs, \
                                                                             memory_sequence_length=beam_encoder_inputs_length)

        beam_decoder_initial_state = beam_encoder_last_state
        self.beam_decoder_cell_list = self.init_decoder_cell_list[:-1] + [attention_wrapper.AttentionWrapper(\
            cell = self.init_decoder_cell_list[-1], \
            attention_mechanism=self.beam_attention_mechanism,\
            attention_layer_size=self.hidden_units,\
            cell_input_fn=attn_decoder_input_fn,\
            initial_cell_state=beam_encoder_last_state[-1],\
            alignment_history=False)]
            
        beam_batch_size = self.batch_size * self.beam_width
        beam_initial_state = [state for state in beam_encoder_last_state]
        beam_initial_state[-1] = self.beam_decoder_cell_list[-1].zero_state(batch_size=beam_batch_size, dtype=self.dtype)
        beam_decoder_initial_state = tuple(beam_initial_state)
        
        return MultiRNNCell(self.decoder_cell_list), decoder_initial_state, \
               MultiRNNCell(self.beam_decoder_cell_list), beam_decoder_initial_state
Exemple #5
0
    def createGraph(self):

        self.input = tf.placeholder(tf.int32, [self.batch_size, self.seq_len],
                                    name='inputs')
        self.targs = tf.placeholder(tf.int32, [self.batch_size, self.seq_len],
                                    name='targets')
        onehot = tf.one_hot(self.input, self.vocab_size, name='input_oh')

        inputs = tf.split(onehot, self.seq_len, 1)
        inputs = [tf.squeeze(i, [1]) for i in inputs]
        targets = tf.split(self.targs, self.seq_len, 1)

        with tf.variable_scope("posRNN"):

            cells = [GRUCell(self.num_hidden) for _ in range(self.num_layers)]

            stacked = MultiRNNCell(cells, state_is_tuple=True)
            self.zero_state = stacked.zero_state(self.batch_size, tf.float32)

            outputs, self.last_state = seq2seq.rnn_decoder(
                inputs, self.zero_state, stacked)

            w = tf.get_variable(
                "w", [self.num_hidden, self.vocab_size],
                tf.float32,
                initializer=tf.random_normal_initializer(stddev=0.02))
            b = tf.get_variable("b", [self.vocab_size],
                                initializer=tf.constant_initializer(0.0))
            logits = [tf.matmul(o, w) + b for o in outputs]

            const_weights = [
                tf.ones([self.batch_size]) for _ in xrange(self.seq_len)
            ]
            self.loss = seq2seq.sequence_loss(logits, targets, const_weights)

            self.opt = tf.train.AdamOptimizer(0.001,
                                              beta1=0.5).minimize(self.loss)

        with tf.variable_scope("posRNN", reuse=True):

            batch_size = 1
            self.s_inputs = tf.placeholder(tf.int32, [batch_size],
                                           name='s_inputs')
            s_onehot = tf.one_hot(self.s_inputs,
                                  self.vocab_size,
                                  name='s_input_oh')

            self.s_zero_state = stacked.zero_state(batch_size, tf.float32)
            s_outputs, self.s_last_state = seq2seq.rnn_decoder(
                [s_onehot], self.s_zero_state, stacked)
            s_outputs = tf.reshape(s_outputs, [1, self.num_hidden])
            self.s_probs = tf.nn.softmax(tf.matmul(s_outputs, w) + b)
def create_rnn_cell(cell_type,
                    num_units,
                    num_layers=1,
                    dp_input_keep_prob=1.0,
                    dp_output_keep_prob=1.0,
                    activation=None):
    def single_cell(num_units):
        if cell_type == 'rnn':
            cell_class = BasicRNNCell
        elif cell_type == 'lstm':
            cell_class = LSTMCell
        elif cell_type == 'gru':
            cell_class = GRUCell
        else:
            raise ValueError('Cell Type Not Supported! ')

        if activation is not None:
            if activation == 'relu':
                activation_f = tf.nn.relu
            elif activation == 'sigmoid':
                activation_f = tf.sigmoid
            elif activation == 'elu':
                activation_f = tf.nn.elu
            else:
                raise ValueError('Activation Function Not Supported! ')
        else:
            activation_f = None

        if dp_input_keep_prob != 1.0 or dp_output_keep_prob != 1.0:
            return DropoutWrapper(cell_class(num_units=num_units,
                                             activation=activation_f),
                                  input_keep_prob=dp_input_keep_prob,
                                  output_keep_prob=dp_output_keep_prob)
        else:
            return cell_class(num_units=num_units)

    if isinstance(num_units, list):
        num_layers = len(num_units)
        if num_layers > 1:
            return MultiRNNCell(
                [single_cell(num_units[i]) for i in range(num_layers)])
        else:
            return single_cell(num_units[0])
    else:
        if num_layers > 1:
            return MultiRNNCell(
                [single_cell(num_units) for _ in range(num_layers)])
        else:
            return single_cell(num_units)
Exemple #7
0
class RNNpropModel(nn_opt.BasicNNOptModel):
    def _build_pre(self):
        self.dimA = 20
        self.cellA = MultiRNNCell([LSTMCell(self.dimA)] * 2)
        self.b1 = 0.95
        self.b2 = 0.95
        self.lr = 0.1
        self.eps = 1e-8

    def _build_input(self):
        self.x = self.ph([None])
        self.m = self.ph([None])
        self.v = self.ph([None])
        self.b1t = self.ph([])
        self.b2t = self.ph([])
        self.sid = self.ph([])
        self.cellA_state = tuple(
            (self.ph([None, size.c]), self.ph([None, size.h]))
            for size in self.cellA.state_size)
        self.input_state = [
            self.sid, self.b1t, self.b2t, self.x, self.m, self.v,
            self.cellA_state
        ]

    def _build_initial(self):
        x = self.x
        m = tf.zeros(shape=tf.shape(x))
        v = tf.zeros(shape=tf.shape(x))
        b1t = tf.ones([])
        b2t = tf.ones([])
        cellA_state = self.cellA.zero_state(tf.size(x), tf.float32)
        self.initial_state = [tf.zeros([]), b1t, b2t, x, m, v, cellA_state]

    # return state, fx
    def _iter(self, f, i, state):
        sid, b1t, b2t, x, m, v, cellA_state = state

        fx, grad = self._get_fx(f, i, x)
        grad = tf.stop_gradient(grad)

        m = self.b1 * m + (1 - self.b1) * grad
        v = self.b2 * v + (1 - self.b2) * (grad**2)

        b1t *= self.b1
        b2t *= self.b2

        sv = tf.sqrt(v / (1 - b2t)) + self.eps

        last = tf.stack([grad / sv, (m / (1 - b1t)) / sv], 1)
        last = tf.nn.elu(self.fc(last, 20))

        with tf.variable_scope("cellA"):
            lastA, cellA_state = self.cellA(last, cellA_state)
        with tf.variable_scope("fc_A"):
            a = self.fc(lastA, 1)[:, 0]

        a = tf.tanh(a) * self.lr
        x -= a

        return [sid + 1, b1t, b2t, x, m, v, cellA_state], fx
Exemple #8
0
def create_rnn_cell(cell_type,
                    num_units,
                    num_layers=1,
                    dp_input_keep_prob=1.0,
                    dp_output_keep_prob=1.0):
    def single_cell(num_units):
        if cell_type == 'rnn':
            cell_class = BasicRNNCell
        elif cell_type == 'gru':
            cell_class = GRUCell
        elif cell_type == 'lstm':
            cell_class = LSTMCell
        else:
            raise ValueError('Cell Type Not Supported! ')

        if dp_input_keep_prob != 1.0 or dp_output_keep_prob != 1.0:
            return DropoutWrapper(cell_class(num_units=num_units),
                                  input_keep_prob=dp_input_keep_prob,
                                  output_keep_prob=dp_output_keep_prob)
        else:
            return cell_class(num_units=num_units)

    assert (len(num_units) == num_layers)
    if num_layers > 1:
        return MultiRNNCell(
            [single_cell(num_units[i]) for i in range(num_layers)])
    else:
        return single_cell(num_units[0])
    def Encoder(self, xs):
        encoder_input = tf.one_hot(tf.cast(xs, tf.int32), self.val_size_x) 
    
        encoder_input = self.WordEmb(encoder_input)
        
        if self.args.train:
            inputs_length = self.inputs_length_PH
        elif self.args.test:
            inputs_length = self.inputs_length_test_PH
            
        multirnn_cell = MultiRNNCell([LSTMCell(self.encoder_units) 
            for _ in range(self.encoder_lay_Num)],  state_is_tuple=True)
            
        (fw_outputs, bw_outputs), (fw_final_state, bw_final_state) = (
            tf.nn.bidirectional_dynamic_rnn(cell_fw=multirnn_cell, 
                                            cell_bw=multirnn_cell, inputs=encoder_input,
                                            sequence_length=inputs_length, dtype=self.dtype))
                                            
        sentence_code = tf.concat((fw_outputs, bw_outputs), axis = 2)
      
        sentence_code_ = []
        for i in range(self.batch_size):
            sentence_code_.append(sentence_code[i,inputs_length[i]-1,:])
        
        encoder_output = tf.stack(sentence_code_)
        
        encoder_output = tf.layers.dense(inputs=encoder_output, units=self.encoder_units, activation=tf.nn.relu)
        

  
        return encoder_output
Exemple #10
0
    def build_decoder_cell(self):

        encoder_outputs = self.encoder_outputs
        encoder_last_state = self.encoder_last_state
        encoder_inputs_length = self.encoder_inputs_length

        if self.use_beamsearch_decode:
            print ("use beamsearch decoding..")
            encoder_outputs = seq2seq.tile_batch(
                self.encoder_outputs, multiplier=self.beam_width)
            encoder_last_state = nest.map_structure(
                lambda s: seq2seq.tile_batch(s, self.beam_width), self.encoder_last_state)
            encoder_inputs_length = seq2seq.tile_batch(
                self.encoder_inputs_length, multiplier=self.beam_width)

        # Building attention mechanism: Default Bahdanau
        # 'Bahdanau' style attention: https://arxiv.org/abs/1409.0473
        self.attention_mechanism = attention_wrapper.BahdanauAttention(
            num_units=self.hidden_units, memory=encoder_outputs,
            memory_sequence_length=encoder_inputs_length,) 
        # 'Luong' style attention: https://arxiv.org/abs/1508.04025
        if self.attention_type.lower() == 'luong':
            self.attention_mechanism = attention_wrapper.LuongAttention(
                num_units=self.hidden_units, memory=encoder_outputs, 
                memory_sequence_length=encoder_inputs_length,)
 
        # Building decoder_cell
        self.decoder_cell_list = [
            self.build_single_cell() for i in range(self.depth)]
        decoder_initial_state = encoder_last_state

        def attn_decoder_input_fn(inputs, attention):
            if not self.attn_input_feeding:
                return inputs

            # Essential when use_residual=True
            _input_layer = Dense(self.hidden_units, dtype=self.dtype,
                                 name='attn_input_feeding')
            return _input_layer(array_ops.concat([inputs, attention], -1))

        # AttentionWrapper wraps RNNCell with the attention_mechanism
        # Note: We implement Attention mechanism only on the top decoder layer
        self.decoder_cell_list[-1] = attention_wrapper.AttentionWrapper(
            cell=self.decoder_cell_list[-1],
            attention_mechanism=self.attention_mechanism,
            attention_layer_size=self.hidden_units,
            cell_input_fn=attn_decoder_input_fn,
            initial_cell_state=encoder_last_state[-1],
            alignment_history=False,
            name='Attention_Wrapper')

        batch_size = self.batch_size if not self.use_beamsearch_decode \
                     else self.batch_size * self.beam_width
        initial_state = [state for state in encoder_last_state]

        initial_state[-1] = self.decoder_cell_list[-1].zero_state(
          batch_size=batch_size, dtype=self.dtype)
        decoder_initial_state = tuple(initial_state)

        return MultiRNNCell(self.decoder_cell_list), decoder_initial_state
Exemple #11
0
 def _create_rnn_cell(self):
     if self.cfg["num_layers"] is None or self.cfg["num_layers"] <= 1:
         return self._create_single_rnn_cell(self.cfg["num_units"])
     else:
         MultiRNNCell([
             self._create_single_rnn_cell(self.cfg["num_units"])
             for _ in range(self.cfg["num_layers"])
         ])
Exemple #12
0
    def Decoder(self, encoder_output):
        def loop_fn(time, cell_output, cell_state, loop_state):
            if cell_output is None:  #time = 0
                # initialization
                input = tf.concat((encoder_output, encoder_output), axis=1)
                state = (multirnn_cell.zero_state(self.batch_size, tf.float32))
                emit_output = None
                loop_state = None
                elements_finished = False
            else:
                emit_output = cell_output
                if self.args.test:
                    #decoder_units to val_size
                    transformed_output = tf.nn.xw_plus_b(
                        cell_output, self.decoder_W,
                        self.decoder_b)  #decoder_units to vac_size
                    #argmax
                    transformed_output = tf.argmax(transformed_output, 1)
                    transformed_output = tf.one_hot(transformed_output,
                                                    self.val_size,
                                                    on_value=1.0,
                                                    off_value=0.0,
                                                    axis=-1)
                    #val_size to decoder_units//2
                    transformed_output = self.WordEmb(transformed_output)
                elif self.args.train:
                    ys_onehot = tf.one_hot(self.ys_PH[:, (time - 1)],
                                           self.val_size,
                                           on_value=1.0,
                                           off_value=0.0,
                                           axis=-1)
                    transformed_output = self.WordEmb(ys_onehot)

                input = tf.concat([transformed_output, encoder_output], axis=1)
                state = cell_state
                loop_state = None
            elements_finished = (time >= self.max_len)
            return (elements_finished, input, state, emit_output, loop_state)

        multirnn_cell = MultiRNNCell(
            [LSTMCell(self.decoder_units) for _ in range(self.lay_num)],
            state_is_tuple=True)
        emit_ta, final_state, final_loop_state = tf.nn.raw_rnn(
            multirnn_cell, loop_fn)

        # transpose for putting batch dimension to first dimension
        outputs = tf.transpose(emit_ta.stack(), [1, 0, 2])

        #transform decoder_units to val_size
        decoder_output_flat = tf.reshape(outputs, [-1, self.decoder_units])
        decoder_output_transform_flat = tf.nn.xw_plus_b(
            decoder_output_flat, self.decoder_W, self.decoder_b)
        decoder_logits = tf.reshape(
            decoder_output_transform_flat,
            (self.batch_size, self.max_len, self.val_size))

        return decoder_logits
Exemple #13
0
    def build_dec_cell(self, hidden_size):
        enc_outputs = self.enc_outputs
        enc_last_state = self.enc_last_state
        enc_inputs_length = self.enc_inp_len

        if self.use_beam_search:
            self.logger.info("using beam search decoding")
            enc_outputs = seq2seq.tile_batch(self.enc_outputs,
                                             multiplier=self.p.beam_width)
            enc_last_state = nest.map_structure(
                lambda s: seq2seq.tile_batch(s, self.p.beam_width),
                self.enc_last_state)
            enc_inputs_length = seq2seq.tile_batch(self.enc_inp_len,
                                                   self.p.beam_width)

        if self.p.attention_type.lower() == 'luong':
            self.attention_mechanism = attention_wrapper.LuongAttention(
                num_units=hidden_size,
                memory=enc_outputs,
                memory_sequence_length=enc_inputs_length)
        else:
            self.attention_mechanism = attention_wrapper.BahdanauAttention(
                num_units=hidden_size,
                memory=enc_outputs,
                memory_sequence_length=enc_inputs_length)

        def attn_dec_input_fn(inputs, attention):
            if not self.p.attn_input_feeding:
                return inputs
            else:
                _input_layer = Dense(hidden_size,
                                     dtype=self.p.dtype,
                                     name='attn_input_feeding')
                return _input_layer(tf.concat([inputs, attention], -1))

        self.dec_cell_list = [
            self.build_single_cell(hidden_size) for _ in range(self.p.depth)
        ]

        if self.p.use_attn:
            self.dec_cell_list[-1] = attention_wrapper.AttentionWrapper(
                cell=self.dec_cell_list[-1],
                attention_mechanism=self.attention_mechanism,
                attention_layer_size=hidden_size,
                cell_input_fn=attn_dec_input_fn,
                initial_cell_state=enc_last_state[-1],
                alignment_history=False,
                name='attention_wrapper')

        batch_size = self.p.batch_size if not self.use_beam_search else self.p.batch_size * self.p.beam_width
        initial_state = [state for state in enc_last_state]
        if self.p.use_attn:
            initial_state[-1] = self.dec_cell_list[-1].zero_state(
                batch_size=batch_size, dtype=self.p.dtype)
        dec_initial_state = tuple(initial_state)

        return MultiRNNCell(self.dec_cell_list), dec_initial_state
def stacked_rnn_step(input_vocabulary_size,
                     hidden_size=13,
                     emb_dim=11,
                     n_layers=2,
                     variable_scope='encdec'):
    with tf.variable_scope(variable_scope, reuse=None):
        rnn_cell = MultiRNNCell([LSTMCell(hidden_size)] *
                                n_layers)  # stacked LSTM
        proj_wrapper = InputProjectionWrapper(rnn_cell, emb_dim)
    embedding_wrapper = EmbeddingWrapper(proj_wrapper, input_vocabulary_size,
                                         emb_dim)
    return embedding_wrapper
Exemple #15
0
def build_decoder_cell(rank, u_emb, batch_size, depth=2):
  cell = []
  for i in range(depth):
    if i == 0:
      cell.append(LSTMCell(rank, state_is_tuple=True))
    else:
      cell.append(ResidualWrapper(LSTMCell(rank, state_is_tuple=True)))
  initial_state = LSTMStateTuple(tf.zeros_like(u_emb), u_emb)
  initial_state = [initial_state, ]
  for i in range(1, depth):
    initial_state.append(cell[i].zero_state(batch_size, tf.float32))
  return MultiRNNCell(cell), tuple(initial_state)
Exemple #16
0
    def model(data, weights, biases):
        cell = LSTMCell(NUM_NEURONS)  # Or LSTMCell(num_neurons)
        cell = MultiRNNCell([cell] * NUM_LAYERS)

        output, _ = tf.nn.rnn(cell, train_data_node, dtype=DATA_TYPE)
        output = tf.transpose(output, [1, 0, 2])
        last = tf.gather(output, int(output.get_shape()[0]) - 1)
        out_size = int(train_labels_node.get_shape()[1])

        prediction = tf.nn.softmax(
            tf.matmul(last, weights['out']) + biases['out'])
        # cross_entropy = -tf.reduce_sum(train_labels_node * tf.log(prediction))
        return prediction
Exemple #17
0
 def prediction(self):
     # Recurrent network.
     network = GRUCell(self._num_hidden)
     network = DropoutWrapper(network, output_keep_prob=self.dropout)
     network = MultiRNNCell([network] * self._num_layers)
     output, _ = tf.nn.dynamic_rnn(network, data, dtype=tf.float32)
     # Select last output.
     output = tf.transpose(output, [1, 0, 2])
     last = tf.gather(output, int(output.get_shape()[0]) - 1)
     # Softmax layer.
     weight, bias = self._weight_and_bias(self._num_hidden,
                                          int(self.target.get_shape()[1]))
     prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
     return prediction
Exemple #18
0
    def build_cell(self, hidden_units, depth=1):
        '''Create forward and reverse RNNCell networks.

            Args:
              hidden_units: Units of RNNCell.
              depth: The number of RNNCell layers.

            Returns:
              An example of RNNCell
            '''
        cell_lists = [
            self.build_single_cell(hidden_units) for i in range(depth)
        ]
        return MultiRNNCell(cell_lists)
    def _create_decoder_cell(self):
        enc_outputs, enc_states, enc_seq_len = self.enc_outputs, self.enc_states, self.enc_seq_len
        batch_size = self.batch_size * self.cfg.beam_size if self.use_beam_search else self.batch_size
        with tf.variable_scope("attention"):
            if self.cfg.attention == "luong":  # Luong attention mechanism
                attention_mechanism = LuongAttention(
                    num_units=self.cfg.num_units,
                    memory=enc_outputs,
                    memory_sequence_length=enc_seq_len)
            else:  # default using Bahdanau attention mechanism
                attention_mechanism = BahdanauAttention(
                    num_units=self.cfg.num_units,
                    memory=enc_outputs,
                    memory_sequence_length=enc_seq_len)

        def cell_input_fn(
            inputs, attention
        ):  # define cell input function to keep input/output dimension same
            # reference: https://www.tensorflow.org/api_docs/python/tf/contrib/seq2seq/AttentionWrapper
            if not self.cfg.use_attention_input_feeding:
                return inputs
            input_project = tf.layers.Dense(self.cfg.num_units,
                                            dtype=tf.float32,
                                            name='attn_input_feeding')
            return input_project(tf.concat([inputs, attention], axis=-1))

        if self.cfg.top_attention:  # apply attention mechanism only on the top decoder layer
            cells = [
                self._create_rnn_cell() for _ in range(self.cfg.num_layers)
            ]
            cells[-1] = AttentionWrapper(
                cells[-1],
                attention_mechanism=attention_mechanism,
                name="Attention_Wrapper",
                attention_layer_size=self.cfg.num_units,
                initial_cell_state=enc_states[-1],
                cell_input_fn=cell_input_fn)
            initial_state = [state for state in enc_states]
            initial_state[-1] = cells[-1].zero_state(batch_size=batch_size,
                                                     dtype=tf.float32)
            dec_init_states = tuple(initial_state)
            cells = MultiRNNCell(cells)
        else:
            cells = MultiRNNCell(
                [self._create_rnn_cell() for _ in range(self.cfg.num_layers)])
            cells = AttentionWrapper(cells,
                                     attention_mechanism=attention_mechanism,
                                     name="Attention_Wrapper",
                                     attention_layer_size=self.cfg.num_units,
                                     initial_cell_state=enc_states,
                                     cell_input_fn=cell_input_fn)
            dec_init_states = cells.zero_state(
                batch_size=batch_size,
                dtype=tf.float32).clone(cell_state=enc_states)
        return cells, dec_init_states
Exemple #20
0
    def build_decoder_cell(self):
        encoder_outputs = self.encoder_outputs
        encoder_last_state = self.encoder_last_state
        encoder_inputs_length = self.encoder_inputs_length
        # building attention mechanism: default Bahdanau
        # 'Bahdanau': https://arxiv.org/abs/1409.0473
        self.attention_mechanism = attention_wrapper.BahdanauAttention(
            num_units=self.hidden_size,
            memory=encoder_outputs,
            memory_sequence_length=encoder_inputs_length)
        # 'Luong': https://arxiv.org/abs/1508.04025
        if self.attention_type.lower() == 'luong':
            self.attention_mechanism = attention_wrapper.LuongAttention(
                num_units=self.hidden_size,
                memory=self.encoder_outputs,
                memory_sequence_length=self.encoder_inputs_length)

        # building decoder_cell
        self.decoder_cell_list = [
            self.build_single_cell() for _ in range(self.layer_num)
        ]

        def att_decoder_input_fn(inputs, attention):
            if not self.use_att_decoding:
                return inputs

            _input_layer = Dense(self.hidden_size,
                                 dtype=self.dtype,
                                 name='att_input_feeding')
            return _input_layer(array_ops.concat([inputs, attention], axis=-1))

        # AttentionWrapper wraps RNNCell with the attention_mechanism
        # implement attention mechanism only on the top of decoder layer
        self.decoder_cell_list[-1] = attention_wrapper.AttentionWrapper(
            cell=self.decoder_cell_list[-1],
            attention_mechanism=self.attention_mechanism,
            attention_layer_size=self.hidden_size,
            cell_input_fn=att_decoder_input_fn,
            initial_cell_state=encoder_last_state[
                -1],  # last hidden state of last encode layer
            alignment_history=False,
            name='Attention_Wrapper')
        initial_state = [state for state in encoder_last_state]
        initial_state[-1] = self.decoder_cell_list[-1].zero_state(
            batch_size=self.batch_size, dtype=self.dtype)
        decoder_initial_state = tuple(initial_state)
        return MultiRNNCell(self.decoder_cell_list), decoder_initial_state
Exemple #21
0
 def gru_net_initial(self,
                     hidden_units,
                     input_data,
                     initial_state,
                     input_length,
                     depth=1):
     cell_lists = [
         self.build_single_cell(hidden_units) for i in range(depth)
     ]
     multi_cell = MultiRNNCell(cell_lists, state_is_tuple=False)
     input_length = tf.reshape(input_length, [-1])
     output, state = tf.nn.dynamic_rnn(multi_cell,
                                       input_data,
                                       sequence_length=input_length,
                                       initial_state=initial_state,
                                       dtype=tf.float32)
     return output
Exemple #22
0
def create_rnn_cell(cell_type,
                    num_units,
                    num_layers=1,
                    dp_input_keep_prob=1.0,
                    dp_output_keep_prob=1.0,
                    residual_connections=False):
    """
	TODO: MOVE THIS properly to utils. Write doc
	:param cell_type:
	:param num_units:
	:param num_layers:
	:param dp_input_keep_prob:
	:param dp_output_keep_prob:
	:param residual_connections:
	:return:
	"""
    def single_cell(num_units):
        if cell_type == "lstm":
            cell_class = LSTMCell
        elif cell_type == "gru":
            cell_class = GRUCell

        if residual_connections:
            if dp_input_keep_prob != 1.0 or dp_output_keep_prob != 1.0:
                return DropoutWrapper(ResidualWrapper(
                    cell_class(num_units=num_units)),
                                      input_keep_prob=dp_input_keep_prob,
                                      output_keep_prob=dp_output_keep_prob)
            else:
                return ResidualWrapper(cell_class(num_units=num_units))
        else:
            if dp_input_keep_prob != 1.0 or dp_output_keep_prob != 1.0:
                return DropoutWrapper(cell_class(num_units=num_units),
                                      input_keep_prob=dp_input_keep_prob,
                                      output_keep_prob=dp_output_keep_prob)
            else:
                return cell_class(num_units=num_units)

    if num_layers > 1:
        return MultiRNNCell(
            [single_cell(num_units) for _ in range(num_layers)])
    else:
        return single_cell(num_units)
Exemple #23
0
class LSTMOptModel(nn_opt.BasicNNOptModel):
    def lstm_cell(self):
        return LSTMCell(num_units=self.dimH)

    def _build_pre(self, size):
        self.dimH = size
        self.num_of_layers = 2
        self.cellH = MultiRNNCell(
            [self.lstm_cell() for _ in range(self.num_of_layers)])
        self.lr = 0.1

    def _build_input(self):
        self.x = self.ph([None])
        self.cellH_state = tuple(
            (self.ph([None, size.c]), self.ph([None, size.h]))
            for size in self.cellH.state_size)
        self.input_state = [self.x, self.cellH_state]

    def _build_initial(self):
        x = self.x  # weights of optimizee
        cellH_state = self.cellH.zero_state(tf.size(x), tf.float32)
        self.initial_state = [x, cellH_state]

    # return state, fx
    def iter(self, f, i, state):
        x, cellH_state = state

        fx, grad = self._get_fx(f, i, x)
        self.optimizee_grad.append(grad)
        grad = tf.stop_gradient(grad)

        last = self._deepmind_log_encode(grad)

        with tf.variable_scope("cellH"):
            last, cellH_state = self.cellH(last, cellH_state)

        with tf.variable_scope("fc"):
            last = self.fc(last, 1)

        delta_x = last[:, 0] * self.lr

        x += delta_x
        return [x, cellH_state], fx
Exemple #24
0
    def build_graph(self):
        with tf.variable_scope('lstm'):
            lstm_cell = LSTMCell(self.layer_size)
            rnn_cell = MultiRNNCell([lstm_cell] * self.layers)
            cell_output, self.init_state = rnn_cell(self.model_input,
                                                    self.init_state)
            print("%i layers created" % self.layers)
            self.output_layer = self.__add_output_layer(
                "fc_out", cell_output, self.layer_size, self.output_dim)

            self.output_layer = tf.Print(
                self.output_layer,
                [self.output_layer,
                 tf.convert_to_tensor(self.ground_truth)],
                'Value of output layer and ground truth:',
                summarize=6)

            tf.histogram_summary('lstm_output', self.output_layer)

            return self.output_layer
Exemple #25
0
 def _create_rnn_cell(self):
     if self.cfg["num_layers"] is None or self.cfg["num_layers"] <= 1:
         return self._create_single_rnn_cell(self.cfg["num_units"])
     else:
         if self.cfg["use_stack_rnn"]:
             lstm_cells = []
             for i in range(self.cfg["num_layers"]):
                 cell = tf.nn.rnn_cell.LSTMCell(
                     self.cfg["num_units"],
                     initializer=tf.initializers.orthogonal)
                 cell = tf.contrib.rnn.DropoutWrapper(
                     cell,
                     state_keep_prob=self.keep_prob,
                     input_keep_prob=self.keep_prob,
                     dtype=tf.float32)
                 lstm_cells.append(cell)
             return lstm_cells
         else:
             return MultiRNNCell([
                 self._create_single_rnn_cell(self.cfg["num_units"])
                 for _ in range(self.cfg["num_layers"])
             ])
Exemple #26
0
    def build_decoder_cell(self):
        # No beam search currently

        # Attention
        # TODO: other attention mechanism?
        attention_mechanism = BahdanauAttention(
            num_units=self.config.hidden_units,
            memory=self.encoder_outputs,
            memory_sequence_length=self.encoder_inputs_length)

        decoder_cells = [LSTMCell(self.config.hidden_units)
                         ] * self.config.decoder_depth
        decoder_initial_state = list(self.encoder_last_state)

        def attn_decoder_input_fn(inputs, attention):
            if not self.config.attn_input_feeding:
                return inputs

            # Essential when use_residual=True
            _input_layer = Dense(self.config.hidden_units,
                                 dtype=tf.float32,
                                 name='attn_input_feeding')
            return _input_layer(concat([inputs, attention], -1))

        #Add an attentionWrapper in the lastest layer of decoder
        decoder_cells[-1] = AttentionWrapper(
            cell=decoder_cells[-1],
            attention_mechanism=attention_mechanism,
            attention_layer_size=self.config.hidden_units,
            cell_input_fn=attn_decoder_input_fn,
            initial_cell_state=decoder_initial_state[-1],
            alignment_history=False,
            name='Attention_Wrapper')

        decoder_initial_state[-1] = decoder_cells[-1].zero_state(
            batch_size=self.batch_size, dtype=tf.float32)
        decoder_initial_state = tuple(decoder_initial_state)

        return MultiRNNCell(decoder_cells), decoder_initial_state
Exemple #27
0
    def build_decoder_cell(self):

        encoder_outputs = self.encoder_outputs
        encoder_last_state = self.encoder_last_state
        encoder_inputs_length = self.encoder_inputs_length

        # To use BeamSearchDecoder, encoder_outputs, encoder_last_state,
        # encoder_inputs_length
        # needs to be tiled so that: [batch_size, .., ..] -> [batch_size x
        # beam_width, .., ..]
        if self.use_beamsearch_decode:
            print("use beamsearch decoding..")
            encoder_outputs = seq2seq.tile_batch(self.encoder_outputs,
                                                 multiplier=self.beam_width)
            encoder_last_state = nest.map_structure(
                lambda s: seq2seq.tile_batch(s, self.beam_width),
                self.encoder_last_state)
            encoder_inputs_length = seq2seq.tile_batch(
                self.encoder_inputs_length, multiplier=self.beam_width)

        # Building decoder_cell
        self.decoder_cell_list = [
            self.build_single_cell() for i in range(self.depth)
        ]
        # ADD GPU SUPPORT FOR DISTRIBUION
        decoder_initial_state = encoder_last_state

        # Also if beamsearch decoding is used, the batch_size argument in
        # .zero_state
        # should be ${decoder_beam_width} times to the origianl batch_size
        batch_size = self.batch_size if not self.use_beamsearch_decode \
                     else self.batch_size * self.beam_width
        initial_state = [state for state in encoder_last_state]

        initial_state[-1] = self.decoder_cell_list[-1].zero_state(
            batch_size=batch_size, dtype=self.dtype)
        decoder_initial_state = tuple(initial_state)

        return MultiRNNCell(self.decoder_cell_list), decoder_initial_state
Exemple #28
0
class LSTMOptModel(nn_opt.BasicNNOptModel):
    def _build_pre(self):
        self.dimH = 20
        self.cellH = MultiRNNCell([LSTMCell(self.dimH)] * 2)
        self.lr = 0.1

    def _build_input(self):
        self.x = self.ph([None])
        self.cellH_state = tuple(
            (self.ph([None, size.c]), self.ph([None, size.h]))
            for size in self.cellH.state_size)
        self.input_state = [self.x, self.cellH_state]

    def _build_initial(self):
        x = self.x
        cellH_state = self.cellH.zero_state(tf.size(x), tf.float32)
        self.initial_state = [x, cellH_state]

    # return state, fx
    def _iter(self, f, i, state):
        x, cellH_state = state

        fx, grad = self._get_fx(f, i, x)
        grad = tf.stop_gradient(grad)

        last = self._deepmind_log_encode(grad)

        with tf.variable_scope("cellH"):
            last, cellH_state = self.cellH(last, cellH_state)

        with tf.variable_scope("fc"):
            last = self.fc(last, 1)

        delta_x = last[:, 0] * self.lr

        x += delta_x
        return [x, cellH_state], fx
Exemple #29
0
def create_cudnn_LSTM_cell(num_units,
                           input_size,
                           num_layers=1,
                           dp_input_keep_prob=1.0,
                           dp_output_keep_prob=1.0):
    def single_cell(name):
        with tf.variable_scope(name):
            if dp_input_keep_prob != 1.0 or dp_output_keep_prob != 1.0:
                return DropoutWrapper(cudnn_LSTMCell(
                    num_units=num_units,
                    input_size=input_size,
                    direction='unidirectional'),
                                      input_keep_prob=dp_input_keep_prob,
                                      output_keep_prob=dp_output_keep_prob)
            else:
                return cudnn_LSTMCell(num_units=num_units,
                                      input_size=input_size,
                                      direction='unidirectional')

    if num_layers > 1:
        return MultiRNNCell(
            [single_cell('layer_%d' % i) for i in range(num_layers)])
    else:
        return single_cell('layer_0')
    def build_encoder_cell(self):

        return MultiRNNCell(
            [self.build_single_cell() for i in range(self.depth)])