def _build_pre(self): self.dimA = 20 self.cellA = MultiRNNCell([LSTMCell(self.dimA)] * 2) self.b1 = 0.95 self.b2 = 0.95 self.lr = 0.1 self.eps = 1e-8
def impress(self, state_code, pre_impress_states): # LSTM, 3 layers self.impress_lay_num = 3 with tf.variable_scope('impress', reuse=tf.AUTO_REUSE): def loop_fn(time, cell_output, cell_state, loop_state): if cell_output is None:#time = 0 # initialization input = state_code state = state_ emit_output = None loop_state = None else: input = cell_output emit_output = cell_output state = cell_state loop_state = None elements_finished = (time >= 1) return (elements_finished, input, state, emit_output, loop_state) multirnn_cell = MultiRNNCell([LSTMCell(self.impress_dim) for _ in range(self.impress_lay_num)], state_is_tuple=True) if pre_impress_states == None: state_ = (multirnn_cell.zero_state(self.batch_size, tf.float32)) else: state_ = pre_impress_states emit_ta, states, final_loop_state = tf.nn.raw_rnn(multirnn_cell, loop_fn) state_impress_code = tf.transpose(emit_ta.stack(), [1, 0, 2])[0] # transpose for putting batch dimension to first dimension return state_impress_code, final_loop_state
def _build_pre(self, size): self.dimA = size self.num_of_layers = 2 self.cellA = MultiRNNCell([LSTMCell(num_units=self.dimA) for _ in range(self.num_of_layers)]) self.b1 = 0.95 self.b2 = 0.95 self.lr = 0.1 self.eps = 1e-8
def _build_decoder_cell(self): # no beam encoder_outputs = self.encoder_outputs encoder_last_state = self.encoder_last_state encoder_inputs_length = self.encoder_inputs_length def attn_decoder_input_fn(inputs, attention): if not self.attn_input_feeding: return inputs _input_layer = Dense(self.hidden_units, dtype=self.dtype, name="attn_input_feeding") return _input_layer(array_ops.concat([inputs, attention], -1)) # attention mechanism 'luong' with tf.variable_scope('shared_attention_mechanism'): self.attention_mechanism = attention_wrapper.LuongAttention(num_units=self.hidden_units, \ memory=encoder_outputs, memory_sequence_length=encoder_inputs_length) # build decoder cell self.init_decoder_cell_list = [self._build_single_cell() for i in range(self.depth)] decoder_initial_state = encoder_last_state self.decoder_cell_list = self.init_decoder_cell_list[:-1] + [attention_wrapper.AttentionWrapper(\ cell = self.init_decoder_cell_list[-1], \ attention_mechanism=self.attention_mechanism,\ attention_layer_size=self.hidden_units,\ cell_input_fn=attn_decoder_input_fn,\ initial_cell_state=encoder_last_state[-1],\ alignment_history=False)] batch_size = self.batch_size initial_state = [state for state in encoder_last_state] initial_state[-1] = self.decoder_cell_list[-1].zero_state(batch_size=batch_size, dtype=self.dtype) decoder_initial_state = tuple(initial_state) # beam beam_encoder_outputs = seq2seq.tile_batch(self.encoder_outputs, multiplier=self.beam_width) beam_encoder_last_state = nest.map_structure(lambda s: seq2seq.tile_batch(s, self.beam_width), self.encoder_last_state) beam_encoder_inputs_length = seq2seq.tile_batch(self.encoder_inputs_length, multiplier=self.beam_width) with tf.variable_scope('shared_attention_mechanism', reuse=True): self.beam_attention_mechanism = attention_wrapper.LuongAttention(num_units=self.hidden_units, \ memory=beam_encoder_outputs, \ memory_sequence_length=beam_encoder_inputs_length) beam_decoder_initial_state = beam_encoder_last_state self.beam_decoder_cell_list = self.init_decoder_cell_list[:-1] + [attention_wrapper.AttentionWrapper(\ cell = self.init_decoder_cell_list[-1], \ attention_mechanism=self.beam_attention_mechanism,\ attention_layer_size=self.hidden_units,\ cell_input_fn=attn_decoder_input_fn,\ initial_cell_state=beam_encoder_last_state[-1],\ alignment_history=False)] beam_batch_size = self.batch_size * self.beam_width beam_initial_state = [state for state in beam_encoder_last_state] beam_initial_state[-1] = self.beam_decoder_cell_list[-1].zero_state(batch_size=beam_batch_size, dtype=self.dtype) beam_decoder_initial_state = tuple(beam_initial_state) return MultiRNNCell(self.decoder_cell_list), decoder_initial_state, \ MultiRNNCell(self.beam_decoder_cell_list), beam_decoder_initial_state
def createGraph(self): self.input = tf.placeholder(tf.int32, [self.batch_size, self.seq_len], name='inputs') self.targs = tf.placeholder(tf.int32, [self.batch_size, self.seq_len], name='targets') onehot = tf.one_hot(self.input, self.vocab_size, name='input_oh') inputs = tf.split(onehot, self.seq_len, 1) inputs = [tf.squeeze(i, [1]) for i in inputs] targets = tf.split(self.targs, self.seq_len, 1) with tf.variable_scope("posRNN"): cells = [GRUCell(self.num_hidden) for _ in range(self.num_layers)] stacked = MultiRNNCell(cells, state_is_tuple=True) self.zero_state = stacked.zero_state(self.batch_size, tf.float32) outputs, self.last_state = seq2seq.rnn_decoder( inputs, self.zero_state, stacked) w = tf.get_variable( "w", [self.num_hidden, self.vocab_size], tf.float32, initializer=tf.random_normal_initializer(stddev=0.02)) b = tf.get_variable("b", [self.vocab_size], initializer=tf.constant_initializer(0.0)) logits = [tf.matmul(o, w) + b for o in outputs] const_weights = [ tf.ones([self.batch_size]) for _ in xrange(self.seq_len) ] self.loss = seq2seq.sequence_loss(logits, targets, const_weights) self.opt = tf.train.AdamOptimizer(0.001, beta1=0.5).minimize(self.loss) with tf.variable_scope("posRNN", reuse=True): batch_size = 1 self.s_inputs = tf.placeholder(tf.int32, [batch_size], name='s_inputs') s_onehot = tf.one_hot(self.s_inputs, self.vocab_size, name='s_input_oh') self.s_zero_state = stacked.zero_state(batch_size, tf.float32) s_outputs, self.s_last_state = seq2seq.rnn_decoder( [s_onehot], self.s_zero_state, stacked) s_outputs = tf.reshape(s_outputs, [1, self.num_hidden]) self.s_probs = tf.nn.softmax(tf.matmul(s_outputs, w) + b)
def create_rnn_cell(cell_type, num_units, num_layers=1, dp_input_keep_prob=1.0, dp_output_keep_prob=1.0, activation=None): def single_cell(num_units): if cell_type == 'rnn': cell_class = BasicRNNCell elif cell_type == 'lstm': cell_class = LSTMCell elif cell_type == 'gru': cell_class = GRUCell else: raise ValueError('Cell Type Not Supported! ') if activation is not None: if activation == 'relu': activation_f = tf.nn.relu elif activation == 'sigmoid': activation_f = tf.sigmoid elif activation == 'elu': activation_f = tf.nn.elu else: raise ValueError('Activation Function Not Supported! ') else: activation_f = None if dp_input_keep_prob != 1.0 or dp_output_keep_prob != 1.0: return DropoutWrapper(cell_class(num_units=num_units, activation=activation_f), input_keep_prob=dp_input_keep_prob, output_keep_prob=dp_output_keep_prob) else: return cell_class(num_units=num_units) if isinstance(num_units, list): num_layers = len(num_units) if num_layers > 1: return MultiRNNCell( [single_cell(num_units[i]) for i in range(num_layers)]) else: return single_cell(num_units[0]) else: if num_layers > 1: return MultiRNNCell( [single_cell(num_units) for _ in range(num_layers)]) else: return single_cell(num_units)
class RNNpropModel(nn_opt.BasicNNOptModel): def _build_pre(self): self.dimA = 20 self.cellA = MultiRNNCell([LSTMCell(self.dimA)] * 2) self.b1 = 0.95 self.b2 = 0.95 self.lr = 0.1 self.eps = 1e-8 def _build_input(self): self.x = self.ph([None]) self.m = self.ph([None]) self.v = self.ph([None]) self.b1t = self.ph([]) self.b2t = self.ph([]) self.sid = self.ph([]) self.cellA_state = tuple( (self.ph([None, size.c]), self.ph([None, size.h])) for size in self.cellA.state_size) self.input_state = [ self.sid, self.b1t, self.b2t, self.x, self.m, self.v, self.cellA_state ] def _build_initial(self): x = self.x m = tf.zeros(shape=tf.shape(x)) v = tf.zeros(shape=tf.shape(x)) b1t = tf.ones([]) b2t = tf.ones([]) cellA_state = self.cellA.zero_state(tf.size(x), tf.float32) self.initial_state = [tf.zeros([]), b1t, b2t, x, m, v, cellA_state] # return state, fx def _iter(self, f, i, state): sid, b1t, b2t, x, m, v, cellA_state = state fx, grad = self._get_fx(f, i, x) grad = tf.stop_gradient(grad) m = self.b1 * m + (1 - self.b1) * grad v = self.b2 * v + (1 - self.b2) * (grad**2) b1t *= self.b1 b2t *= self.b2 sv = tf.sqrt(v / (1 - b2t)) + self.eps last = tf.stack([grad / sv, (m / (1 - b1t)) / sv], 1) last = tf.nn.elu(self.fc(last, 20)) with tf.variable_scope("cellA"): lastA, cellA_state = self.cellA(last, cellA_state) with tf.variable_scope("fc_A"): a = self.fc(lastA, 1)[:, 0] a = tf.tanh(a) * self.lr x -= a return [sid + 1, b1t, b2t, x, m, v, cellA_state], fx
def create_rnn_cell(cell_type, num_units, num_layers=1, dp_input_keep_prob=1.0, dp_output_keep_prob=1.0): def single_cell(num_units): if cell_type == 'rnn': cell_class = BasicRNNCell elif cell_type == 'gru': cell_class = GRUCell elif cell_type == 'lstm': cell_class = LSTMCell else: raise ValueError('Cell Type Not Supported! ') if dp_input_keep_prob != 1.0 or dp_output_keep_prob != 1.0: return DropoutWrapper(cell_class(num_units=num_units), input_keep_prob=dp_input_keep_prob, output_keep_prob=dp_output_keep_prob) else: return cell_class(num_units=num_units) assert (len(num_units) == num_layers) if num_layers > 1: return MultiRNNCell( [single_cell(num_units[i]) for i in range(num_layers)]) else: return single_cell(num_units[0])
def Encoder(self, xs): encoder_input = tf.one_hot(tf.cast(xs, tf.int32), self.val_size_x) encoder_input = self.WordEmb(encoder_input) if self.args.train: inputs_length = self.inputs_length_PH elif self.args.test: inputs_length = self.inputs_length_test_PH multirnn_cell = MultiRNNCell([LSTMCell(self.encoder_units) for _ in range(self.encoder_lay_Num)], state_is_tuple=True) (fw_outputs, bw_outputs), (fw_final_state, bw_final_state) = ( tf.nn.bidirectional_dynamic_rnn(cell_fw=multirnn_cell, cell_bw=multirnn_cell, inputs=encoder_input, sequence_length=inputs_length, dtype=self.dtype)) sentence_code = tf.concat((fw_outputs, bw_outputs), axis = 2) sentence_code_ = [] for i in range(self.batch_size): sentence_code_.append(sentence_code[i,inputs_length[i]-1,:]) encoder_output = tf.stack(sentence_code_) encoder_output = tf.layers.dense(inputs=encoder_output, units=self.encoder_units, activation=tf.nn.relu) return encoder_output
def build_decoder_cell(self): encoder_outputs = self.encoder_outputs encoder_last_state = self.encoder_last_state encoder_inputs_length = self.encoder_inputs_length if self.use_beamsearch_decode: print ("use beamsearch decoding..") encoder_outputs = seq2seq.tile_batch( self.encoder_outputs, multiplier=self.beam_width) encoder_last_state = nest.map_structure( lambda s: seq2seq.tile_batch(s, self.beam_width), self.encoder_last_state) encoder_inputs_length = seq2seq.tile_batch( self.encoder_inputs_length, multiplier=self.beam_width) # Building attention mechanism: Default Bahdanau # 'Bahdanau' style attention: https://arxiv.org/abs/1409.0473 self.attention_mechanism = attention_wrapper.BahdanauAttention( num_units=self.hidden_units, memory=encoder_outputs, memory_sequence_length=encoder_inputs_length,) # 'Luong' style attention: https://arxiv.org/abs/1508.04025 if self.attention_type.lower() == 'luong': self.attention_mechanism = attention_wrapper.LuongAttention( num_units=self.hidden_units, memory=encoder_outputs, memory_sequence_length=encoder_inputs_length,) # Building decoder_cell self.decoder_cell_list = [ self.build_single_cell() for i in range(self.depth)] decoder_initial_state = encoder_last_state def attn_decoder_input_fn(inputs, attention): if not self.attn_input_feeding: return inputs # Essential when use_residual=True _input_layer = Dense(self.hidden_units, dtype=self.dtype, name='attn_input_feeding') return _input_layer(array_ops.concat([inputs, attention], -1)) # AttentionWrapper wraps RNNCell with the attention_mechanism # Note: We implement Attention mechanism only on the top decoder layer self.decoder_cell_list[-1] = attention_wrapper.AttentionWrapper( cell=self.decoder_cell_list[-1], attention_mechanism=self.attention_mechanism, attention_layer_size=self.hidden_units, cell_input_fn=attn_decoder_input_fn, initial_cell_state=encoder_last_state[-1], alignment_history=False, name='Attention_Wrapper') batch_size = self.batch_size if not self.use_beamsearch_decode \ else self.batch_size * self.beam_width initial_state = [state for state in encoder_last_state] initial_state[-1] = self.decoder_cell_list[-1].zero_state( batch_size=batch_size, dtype=self.dtype) decoder_initial_state = tuple(initial_state) return MultiRNNCell(self.decoder_cell_list), decoder_initial_state
def _create_rnn_cell(self): if self.cfg["num_layers"] is None or self.cfg["num_layers"] <= 1: return self._create_single_rnn_cell(self.cfg["num_units"]) else: MultiRNNCell([ self._create_single_rnn_cell(self.cfg["num_units"]) for _ in range(self.cfg["num_layers"]) ])
def Decoder(self, encoder_output): def loop_fn(time, cell_output, cell_state, loop_state): if cell_output is None: #time = 0 # initialization input = tf.concat((encoder_output, encoder_output), axis=1) state = (multirnn_cell.zero_state(self.batch_size, tf.float32)) emit_output = None loop_state = None elements_finished = False else: emit_output = cell_output if self.args.test: #decoder_units to val_size transformed_output = tf.nn.xw_plus_b( cell_output, self.decoder_W, self.decoder_b) #decoder_units to vac_size #argmax transformed_output = tf.argmax(transformed_output, 1) transformed_output = tf.one_hot(transformed_output, self.val_size, on_value=1.0, off_value=0.0, axis=-1) #val_size to decoder_units//2 transformed_output = self.WordEmb(transformed_output) elif self.args.train: ys_onehot = tf.one_hot(self.ys_PH[:, (time - 1)], self.val_size, on_value=1.0, off_value=0.0, axis=-1) transformed_output = self.WordEmb(ys_onehot) input = tf.concat([transformed_output, encoder_output], axis=1) state = cell_state loop_state = None elements_finished = (time >= self.max_len) return (elements_finished, input, state, emit_output, loop_state) multirnn_cell = MultiRNNCell( [LSTMCell(self.decoder_units) for _ in range(self.lay_num)], state_is_tuple=True) emit_ta, final_state, final_loop_state = tf.nn.raw_rnn( multirnn_cell, loop_fn) # transpose for putting batch dimension to first dimension outputs = tf.transpose(emit_ta.stack(), [1, 0, 2]) #transform decoder_units to val_size decoder_output_flat = tf.reshape(outputs, [-1, self.decoder_units]) decoder_output_transform_flat = tf.nn.xw_plus_b( decoder_output_flat, self.decoder_W, self.decoder_b) decoder_logits = tf.reshape( decoder_output_transform_flat, (self.batch_size, self.max_len, self.val_size)) return decoder_logits
def build_dec_cell(self, hidden_size): enc_outputs = self.enc_outputs enc_last_state = self.enc_last_state enc_inputs_length = self.enc_inp_len if self.use_beam_search: self.logger.info("using beam search decoding") enc_outputs = seq2seq.tile_batch(self.enc_outputs, multiplier=self.p.beam_width) enc_last_state = nest.map_structure( lambda s: seq2seq.tile_batch(s, self.p.beam_width), self.enc_last_state) enc_inputs_length = seq2seq.tile_batch(self.enc_inp_len, self.p.beam_width) if self.p.attention_type.lower() == 'luong': self.attention_mechanism = attention_wrapper.LuongAttention( num_units=hidden_size, memory=enc_outputs, memory_sequence_length=enc_inputs_length) else: self.attention_mechanism = attention_wrapper.BahdanauAttention( num_units=hidden_size, memory=enc_outputs, memory_sequence_length=enc_inputs_length) def attn_dec_input_fn(inputs, attention): if not self.p.attn_input_feeding: return inputs else: _input_layer = Dense(hidden_size, dtype=self.p.dtype, name='attn_input_feeding') return _input_layer(tf.concat([inputs, attention], -1)) self.dec_cell_list = [ self.build_single_cell(hidden_size) for _ in range(self.p.depth) ] if self.p.use_attn: self.dec_cell_list[-1] = attention_wrapper.AttentionWrapper( cell=self.dec_cell_list[-1], attention_mechanism=self.attention_mechanism, attention_layer_size=hidden_size, cell_input_fn=attn_dec_input_fn, initial_cell_state=enc_last_state[-1], alignment_history=False, name='attention_wrapper') batch_size = self.p.batch_size if not self.use_beam_search else self.p.batch_size * self.p.beam_width initial_state = [state for state in enc_last_state] if self.p.use_attn: initial_state[-1] = self.dec_cell_list[-1].zero_state( batch_size=batch_size, dtype=self.p.dtype) dec_initial_state = tuple(initial_state) return MultiRNNCell(self.dec_cell_list), dec_initial_state
def stacked_rnn_step(input_vocabulary_size, hidden_size=13, emb_dim=11, n_layers=2, variable_scope='encdec'): with tf.variable_scope(variable_scope, reuse=None): rnn_cell = MultiRNNCell([LSTMCell(hidden_size)] * n_layers) # stacked LSTM proj_wrapper = InputProjectionWrapper(rnn_cell, emb_dim) embedding_wrapper = EmbeddingWrapper(proj_wrapper, input_vocabulary_size, emb_dim) return embedding_wrapper
def build_decoder_cell(rank, u_emb, batch_size, depth=2): cell = [] for i in range(depth): if i == 0: cell.append(LSTMCell(rank, state_is_tuple=True)) else: cell.append(ResidualWrapper(LSTMCell(rank, state_is_tuple=True))) initial_state = LSTMStateTuple(tf.zeros_like(u_emb), u_emb) initial_state = [initial_state, ] for i in range(1, depth): initial_state.append(cell[i].zero_state(batch_size, tf.float32)) return MultiRNNCell(cell), tuple(initial_state)
def model(data, weights, biases): cell = LSTMCell(NUM_NEURONS) # Or LSTMCell(num_neurons) cell = MultiRNNCell([cell] * NUM_LAYERS) output, _ = tf.nn.rnn(cell, train_data_node, dtype=DATA_TYPE) output = tf.transpose(output, [1, 0, 2]) last = tf.gather(output, int(output.get_shape()[0]) - 1) out_size = int(train_labels_node.get_shape()[1]) prediction = tf.nn.softmax( tf.matmul(last, weights['out']) + biases['out']) # cross_entropy = -tf.reduce_sum(train_labels_node * tf.log(prediction)) return prediction
def prediction(self): # Recurrent network. network = GRUCell(self._num_hidden) network = DropoutWrapper(network, output_keep_prob=self.dropout) network = MultiRNNCell([network] * self._num_layers) output, _ = tf.nn.dynamic_rnn(network, data, dtype=tf.float32) # Select last output. output = tf.transpose(output, [1, 0, 2]) last = tf.gather(output, int(output.get_shape()[0]) - 1) # Softmax layer. weight, bias = self._weight_and_bias(self._num_hidden, int(self.target.get_shape()[1])) prediction = tf.nn.softmax(tf.matmul(last, weight) + bias) return prediction
def build_cell(self, hidden_units, depth=1): '''Create forward and reverse RNNCell networks. Args: hidden_units: Units of RNNCell. depth: The number of RNNCell layers. Returns: An example of RNNCell ''' cell_lists = [ self.build_single_cell(hidden_units) for i in range(depth) ] return MultiRNNCell(cell_lists)
def _create_decoder_cell(self): enc_outputs, enc_states, enc_seq_len = self.enc_outputs, self.enc_states, self.enc_seq_len batch_size = self.batch_size * self.cfg.beam_size if self.use_beam_search else self.batch_size with tf.variable_scope("attention"): if self.cfg.attention == "luong": # Luong attention mechanism attention_mechanism = LuongAttention( num_units=self.cfg.num_units, memory=enc_outputs, memory_sequence_length=enc_seq_len) else: # default using Bahdanau attention mechanism attention_mechanism = BahdanauAttention( num_units=self.cfg.num_units, memory=enc_outputs, memory_sequence_length=enc_seq_len) def cell_input_fn( inputs, attention ): # define cell input function to keep input/output dimension same # reference: https://www.tensorflow.org/api_docs/python/tf/contrib/seq2seq/AttentionWrapper if not self.cfg.use_attention_input_feeding: return inputs input_project = tf.layers.Dense(self.cfg.num_units, dtype=tf.float32, name='attn_input_feeding') return input_project(tf.concat([inputs, attention], axis=-1)) if self.cfg.top_attention: # apply attention mechanism only on the top decoder layer cells = [ self._create_rnn_cell() for _ in range(self.cfg.num_layers) ] cells[-1] = AttentionWrapper( cells[-1], attention_mechanism=attention_mechanism, name="Attention_Wrapper", attention_layer_size=self.cfg.num_units, initial_cell_state=enc_states[-1], cell_input_fn=cell_input_fn) initial_state = [state for state in enc_states] initial_state[-1] = cells[-1].zero_state(batch_size=batch_size, dtype=tf.float32) dec_init_states = tuple(initial_state) cells = MultiRNNCell(cells) else: cells = MultiRNNCell( [self._create_rnn_cell() for _ in range(self.cfg.num_layers)]) cells = AttentionWrapper(cells, attention_mechanism=attention_mechanism, name="Attention_Wrapper", attention_layer_size=self.cfg.num_units, initial_cell_state=enc_states, cell_input_fn=cell_input_fn) dec_init_states = cells.zero_state( batch_size=batch_size, dtype=tf.float32).clone(cell_state=enc_states) return cells, dec_init_states
def build_decoder_cell(self): encoder_outputs = self.encoder_outputs encoder_last_state = self.encoder_last_state encoder_inputs_length = self.encoder_inputs_length # building attention mechanism: default Bahdanau # 'Bahdanau': https://arxiv.org/abs/1409.0473 self.attention_mechanism = attention_wrapper.BahdanauAttention( num_units=self.hidden_size, memory=encoder_outputs, memory_sequence_length=encoder_inputs_length) # 'Luong': https://arxiv.org/abs/1508.04025 if self.attention_type.lower() == 'luong': self.attention_mechanism = attention_wrapper.LuongAttention( num_units=self.hidden_size, memory=self.encoder_outputs, memory_sequence_length=self.encoder_inputs_length) # building decoder_cell self.decoder_cell_list = [ self.build_single_cell() for _ in range(self.layer_num) ] def att_decoder_input_fn(inputs, attention): if not self.use_att_decoding: return inputs _input_layer = Dense(self.hidden_size, dtype=self.dtype, name='att_input_feeding') return _input_layer(array_ops.concat([inputs, attention], axis=-1)) # AttentionWrapper wraps RNNCell with the attention_mechanism # implement attention mechanism only on the top of decoder layer self.decoder_cell_list[-1] = attention_wrapper.AttentionWrapper( cell=self.decoder_cell_list[-1], attention_mechanism=self.attention_mechanism, attention_layer_size=self.hidden_size, cell_input_fn=att_decoder_input_fn, initial_cell_state=encoder_last_state[ -1], # last hidden state of last encode layer alignment_history=False, name='Attention_Wrapper') initial_state = [state for state in encoder_last_state] initial_state[-1] = self.decoder_cell_list[-1].zero_state( batch_size=self.batch_size, dtype=self.dtype) decoder_initial_state = tuple(initial_state) return MultiRNNCell(self.decoder_cell_list), decoder_initial_state
def gru_net_initial(self, hidden_units, input_data, initial_state, input_length, depth=1): cell_lists = [ self.build_single_cell(hidden_units) for i in range(depth) ] multi_cell = MultiRNNCell(cell_lists, state_is_tuple=False) input_length = tf.reshape(input_length, [-1]) output, state = tf.nn.dynamic_rnn(multi_cell, input_data, sequence_length=input_length, initial_state=initial_state, dtype=tf.float32) return output
def create_rnn_cell(cell_type, num_units, num_layers=1, dp_input_keep_prob=1.0, dp_output_keep_prob=1.0, residual_connections=False): """ TODO: MOVE THIS properly to utils. Write doc :param cell_type: :param num_units: :param num_layers: :param dp_input_keep_prob: :param dp_output_keep_prob: :param residual_connections: :return: """ def single_cell(num_units): if cell_type == "lstm": cell_class = LSTMCell elif cell_type == "gru": cell_class = GRUCell if residual_connections: if dp_input_keep_prob != 1.0 or dp_output_keep_prob != 1.0: return DropoutWrapper(ResidualWrapper( cell_class(num_units=num_units)), input_keep_prob=dp_input_keep_prob, output_keep_prob=dp_output_keep_prob) else: return ResidualWrapper(cell_class(num_units=num_units)) else: if dp_input_keep_prob != 1.0 or dp_output_keep_prob != 1.0: return DropoutWrapper(cell_class(num_units=num_units), input_keep_prob=dp_input_keep_prob, output_keep_prob=dp_output_keep_prob) else: return cell_class(num_units=num_units) if num_layers > 1: return MultiRNNCell( [single_cell(num_units) for _ in range(num_layers)]) else: return single_cell(num_units)
class LSTMOptModel(nn_opt.BasicNNOptModel): def lstm_cell(self): return LSTMCell(num_units=self.dimH) def _build_pre(self, size): self.dimH = size self.num_of_layers = 2 self.cellH = MultiRNNCell( [self.lstm_cell() for _ in range(self.num_of_layers)]) self.lr = 0.1 def _build_input(self): self.x = self.ph([None]) self.cellH_state = tuple( (self.ph([None, size.c]), self.ph([None, size.h])) for size in self.cellH.state_size) self.input_state = [self.x, self.cellH_state] def _build_initial(self): x = self.x # weights of optimizee cellH_state = self.cellH.zero_state(tf.size(x), tf.float32) self.initial_state = [x, cellH_state] # return state, fx def iter(self, f, i, state): x, cellH_state = state fx, grad = self._get_fx(f, i, x) self.optimizee_grad.append(grad) grad = tf.stop_gradient(grad) last = self._deepmind_log_encode(grad) with tf.variable_scope("cellH"): last, cellH_state = self.cellH(last, cellH_state) with tf.variable_scope("fc"): last = self.fc(last, 1) delta_x = last[:, 0] * self.lr x += delta_x return [x, cellH_state], fx
def build_graph(self): with tf.variable_scope('lstm'): lstm_cell = LSTMCell(self.layer_size) rnn_cell = MultiRNNCell([lstm_cell] * self.layers) cell_output, self.init_state = rnn_cell(self.model_input, self.init_state) print("%i layers created" % self.layers) self.output_layer = self.__add_output_layer( "fc_out", cell_output, self.layer_size, self.output_dim) self.output_layer = tf.Print( self.output_layer, [self.output_layer, tf.convert_to_tensor(self.ground_truth)], 'Value of output layer and ground truth:', summarize=6) tf.histogram_summary('lstm_output', self.output_layer) return self.output_layer
def _create_rnn_cell(self): if self.cfg["num_layers"] is None or self.cfg["num_layers"] <= 1: return self._create_single_rnn_cell(self.cfg["num_units"]) else: if self.cfg["use_stack_rnn"]: lstm_cells = [] for i in range(self.cfg["num_layers"]): cell = tf.nn.rnn_cell.LSTMCell( self.cfg["num_units"], initializer=tf.initializers.orthogonal) cell = tf.contrib.rnn.DropoutWrapper( cell, state_keep_prob=self.keep_prob, input_keep_prob=self.keep_prob, dtype=tf.float32) lstm_cells.append(cell) return lstm_cells else: return MultiRNNCell([ self._create_single_rnn_cell(self.cfg["num_units"]) for _ in range(self.cfg["num_layers"]) ])
def build_decoder_cell(self): # No beam search currently # Attention # TODO: other attention mechanism? attention_mechanism = BahdanauAttention( num_units=self.config.hidden_units, memory=self.encoder_outputs, memory_sequence_length=self.encoder_inputs_length) decoder_cells = [LSTMCell(self.config.hidden_units) ] * self.config.decoder_depth decoder_initial_state = list(self.encoder_last_state) def attn_decoder_input_fn(inputs, attention): if not self.config.attn_input_feeding: return inputs # Essential when use_residual=True _input_layer = Dense(self.config.hidden_units, dtype=tf.float32, name='attn_input_feeding') return _input_layer(concat([inputs, attention], -1)) #Add an attentionWrapper in the lastest layer of decoder decoder_cells[-1] = AttentionWrapper( cell=decoder_cells[-1], attention_mechanism=attention_mechanism, attention_layer_size=self.config.hidden_units, cell_input_fn=attn_decoder_input_fn, initial_cell_state=decoder_initial_state[-1], alignment_history=False, name='Attention_Wrapper') decoder_initial_state[-1] = decoder_cells[-1].zero_state( batch_size=self.batch_size, dtype=tf.float32) decoder_initial_state = tuple(decoder_initial_state) return MultiRNNCell(decoder_cells), decoder_initial_state
def build_decoder_cell(self): encoder_outputs = self.encoder_outputs encoder_last_state = self.encoder_last_state encoder_inputs_length = self.encoder_inputs_length # To use BeamSearchDecoder, encoder_outputs, encoder_last_state, # encoder_inputs_length # needs to be tiled so that: [batch_size, .., ..] -> [batch_size x # beam_width, .., ..] if self.use_beamsearch_decode: print("use beamsearch decoding..") encoder_outputs = seq2seq.tile_batch(self.encoder_outputs, multiplier=self.beam_width) encoder_last_state = nest.map_structure( lambda s: seq2seq.tile_batch(s, self.beam_width), self.encoder_last_state) encoder_inputs_length = seq2seq.tile_batch( self.encoder_inputs_length, multiplier=self.beam_width) # Building decoder_cell self.decoder_cell_list = [ self.build_single_cell() for i in range(self.depth) ] # ADD GPU SUPPORT FOR DISTRIBUION decoder_initial_state = encoder_last_state # Also if beamsearch decoding is used, the batch_size argument in # .zero_state # should be ${decoder_beam_width} times to the origianl batch_size batch_size = self.batch_size if not self.use_beamsearch_decode \ else self.batch_size * self.beam_width initial_state = [state for state in encoder_last_state] initial_state[-1] = self.decoder_cell_list[-1].zero_state( batch_size=batch_size, dtype=self.dtype) decoder_initial_state = tuple(initial_state) return MultiRNNCell(self.decoder_cell_list), decoder_initial_state
class LSTMOptModel(nn_opt.BasicNNOptModel): def _build_pre(self): self.dimH = 20 self.cellH = MultiRNNCell([LSTMCell(self.dimH)] * 2) self.lr = 0.1 def _build_input(self): self.x = self.ph([None]) self.cellH_state = tuple( (self.ph([None, size.c]), self.ph([None, size.h])) for size in self.cellH.state_size) self.input_state = [self.x, self.cellH_state] def _build_initial(self): x = self.x cellH_state = self.cellH.zero_state(tf.size(x), tf.float32) self.initial_state = [x, cellH_state] # return state, fx def _iter(self, f, i, state): x, cellH_state = state fx, grad = self._get_fx(f, i, x) grad = tf.stop_gradient(grad) last = self._deepmind_log_encode(grad) with tf.variable_scope("cellH"): last, cellH_state = self.cellH(last, cellH_state) with tf.variable_scope("fc"): last = self.fc(last, 1) delta_x = last[:, 0] * self.lr x += delta_x return [x, cellH_state], fx
def create_cudnn_LSTM_cell(num_units, input_size, num_layers=1, dp_input_keep_prob=1.0, dp_output_keep_prob=1.0): def single_cell(name): with tf.variable_scope(name): if dp_input_keep_prob != 1.0 or dp_output_keep_prob != 1.0: return DropoutWrapper(cudnn_LSTMCell( num_units=num_units, input_size=input_size, direction='unidirectional'), input_keep_prob=dp_input_keep_prob, output_keep_prob=dp_output_keep_prob) else: return cudnn_LSTMCell(num_units=num_units, input_size=input_size, direction='unidirectional') if num_layers > 1: return MultiRNNCell( [single_cell('layer_%d' % i) for i in range(num_layers)]) else: return single_cell('layer_0')
def build_encoder_cell(self): return MultiRNNCell( [self.build_single_cell() for i in range(self.depth)])