def _apply_droput_wrapper(self): cells = [] for _ in range(self.num_layers): cell = self.__new_cell() cell = DropoutWrapper(cell, input_keep_prob=self.in_keep_prob, output_keep_prob=self.out_keep_prob) cells.append(cell) self.multi_cell = MultiRNNCell(cells) self.initial_state = rnn_placeholders( self.multi_cell.zero_state(self.batch_size, tf.float32)) self.zero_state = self.multi_cell.zero_state(self.batch_size, tf.float32)
def Stack_LSTM(inputs, lengths, is_training=False): cell_List = [] for index in range(hp.Speaker_Embedding.LSTM.Nums): new_Cell = ZoneoutLSTMCell( num_units=hp.Speaker_Embedding.LSTM.Cell_Size, num_proj=None if hp.Speaker_Embedding.LSTM.Cell_Size == hp.Speaker_Embedding.Embedding_Size else hp.Speaker_Embedding.Embedding_Size, activation=tf.tanh, is_training=is_training, cell_zoneout_rate=hp.Speaker_Embedding.LSTM.Zoneout_Rate, output_zoneout_rate=hp.Speaker_Embedding.LSTM.Zoneout_Rate, name='lstmcell_{}'.format(index)) if hp.Speaker_Embedding.LSTM.Use_Residual and index < hp.Speaker_Embedding.LSTM.Nums - 1: new_Cell = ResidualWrapper(new_Cell) cell_List.append(new_Cell) with tf.variable_scope('lstm'): new_Tensor, _ = tf.nn.dynamic_rnn( cell=MultiRNNCell(cell_List), inputs=inputs, sequence_length=lengths, dtype=tf.float32, ) return new_Tensor
def build_encoder_cell(self): ''' 构建单独的编码器cell。 根据深度,需要多少层网络。 :return: ''' multi_cell = MultiRNNCell([ self.build_single_cell(self.hidden_units, use_residual=self.use_residual) for _ in range(self.depth) ]) print("in build_encoder_cell") print(hasattr(multi_cell, 'output_size')) print(hasattr(multi_cell, 'state_size')) return multi_cell
def build_encoder_cell(self): ''' 构建单独的编码器cell。 根据深度,需要多少层网络。 :return: ''' multi_cell = MultiRNNCell([ self.build_single_cell( self.hidden_units, use_residual=self.use_residual ) for _ in range(self.depth) ] ) """RNN cell composed sequentially of multiple simple cells. Example: ```python num_units = [128, 64] cells = [BasicLSTMCell(num_units=n) for n in num_units] stacked_rnn_cell = MultiRNNCell(cells) ``` """ # num_units = [] # for i in range(self.depth): # num_units.append(self.hidden_units) # print('num_units 的数目',num_units) # # cells = [self.build_single_cell(n_hidden=n,use_residual=self.use_residual) for n in num_units] # print(cells,'shifou为None') # print(tuple(cell.state_size for cell in cells)) # print(cells[-1].output_size) # # multi_cell = MultiRNNCell(cells) print("in build_encoder_cell") print(hasattr(multi_cell,'output_size')) print(hasattr(multi_cell,'state_size')) return multi_cell
def build_model(self): """Build the lstm model.""" logging.info("Building model...") X = tf.placeholder("int32", shape=[None, self.maxlen], name="x") y_ = tf.placeholder(tf.float64, shape=[None, self.num_classes], name="y_true") with tf.name_scope("embedding"): embedding = tf.get_variable("embedding", dtype=tf.float64, initializer=self.emb_matrix) with tf.name_scope("lstm"): # inputs: [batch_size, maxlen, embedding_dim] # outputs: [batch_size, maxlen, h1_inputs] inputs = tf.nn.embedding_lookup(embedding, X, name="inputs") if self.mode == "basic-lstm": cell = BasicLSTMCell(self.lstm_output_size, name="cell") outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float64) elif self.mode == "bi-lstm": cell_fw = BasicLSTMCell(self.lstm_output_size, name="cell") cell_bw = BasicLSTMCell(self.lstm_output_size, name="cell") # (output_fw, output_bw), output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, dtype=tf.float64) # outputs = tf.concat((output_fw, output_bw), 2) (outputs, output_state_fw, output_state_bw) = tf.nn.static_bidirectional_rnn( cell_fw, cell_bw, inputs, dtype=tf.float32) elif self.model == "two-lstm": cells = [BasicLSTMCell(n) for n in [300, 150]] stacked_rnn_cell = MultiRNNCell(cells) outputs, state = tf.nn.dynamic_rnn(stacked_rnn_cell, inputs, dtype=tf.float64) with tf.name_scope("fc"): if self.mode == "basic_lstm": w = tf.get_variable( "w", shape=[self.lstm_output_size, self.num_classes], dtype=tf.float64) elif self.mode == "bi-lstm": w = tf.get_variable("w", shape=[600, self.num_classes], dtype=tf.float64) elif self.model == "two-lstm": w = tf.get_variable("w", shape=[150, self.num_classes], dtype=tf.float64) b = tf.get_variable("b", shape=[self.num_classes], dtype=tf.float64) act = tf.matmul(outputs[:, -1, :], w) + b y = tf.nn.softmax(act) tf.summary.histogram("w", w) tf.summary.histogram("b", b) with tf.name_scope("train"): cross_entropy = tf.reduce_mean( -tf.reduce_sum(y_ * tf.log(y), axis=[1])) # Define train step. train_step = tf.train.AdagradOptimizer(0.1).minimize(cross_entropy) # Define accuracy. correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64)) tf.summary.scalar("cross_entropy", cross_entropy) tf.summary.scalar("accuracy", accuracy) sess = tf.Session() sess.run(tf.global_variables_initializer()) summ_acc = tf.summary.merge_all(scope="train") summ_fc = tf.summary.merge_all(scope="fc") writer = tf.summary.FileWriter("tmp") writer.add_graph(sess.graph) logging.info("Training model...") data_size = self.x_train.shape[0] s = time.time() for i in range(10): print("epoch {}".format(i)) j = 0 while j + 100 < data_size: batch_xs = self.x_train[j:j + 100] batch_ys = self.y_train[j:j + 100] sess.run(train_step, feed_dict={X: batch_xs, y_: batch_ys}) j += 100 if j % 10000 == 0: summ_fc_tmp = sess.run(summ_fc, feed_dict={ X: batch_xs, y_: batch_ys }) summ_acc_tmp = sess.run(summ_acc, feed_dict={ X: self.x_test, y_: self.y_test }) writer.add_summary(summ_fc_tmp, global_step=j + i * data_size) writer.add_summary(summ_acc_tmp, global_step=j + i * data_size) acc = sess.run(accuracy, feed_dict={ X: self.x_test, y_: self.y_test }) logging.info("Accuracy: {}".format(acc)) t = time.time() logging.info("Train model use {}s".format(t - s))
class CharToChar(): def __init__(self, name, units, train_batch, hot_dimen, num_layers=3, cell_type='lstm', in_keep_prob_val=0.7, out_keep_prob_val=0.7, learning_rate=1e-2, optimizer='adam', use_grad_clip=False, grad_clip_val=5.0): self.name = name self.units = units self.train_batch = train_batch self.num_layers = num_layers self.in_keep_prob_val = in_keep_prob_val self.out_keep_prob_val = out_keep_prob_val self.cell_type = cell_type self.hot_dimen = hot_dimen self.learning_rate = learning_rate self.optimizer = optimizer self.grad_clip_val = grad_clip_val self.use_grad_clip = use_grad_clip self.multi_cell = None self.initial_state = None self.input_placeholder = None self.output_placeholder = None self.outputs_raw = None self.logits = None self.predictions = None self.entropy_loss = None self.grad_update = None self.final_state = None self.sess = None self.init = None self.zero_state = None self.in_keep_prob = None self.out_keep_prob = None self.is_training_done = False tf.reset_default_graph() self._build_placeholders() self._apply_droput_wrapper() self._static_unroll() self._reshape_and_unstack() self._build_optimizer_and_finalize_graph() def __new_cell(self): if self.cell_type == 'lstm': return BasicLSTMCell(self.units) elif self.cell_type == 'rnn': return BasicRNNCell(self.units) else: return GRUCell(self.units) def _apply_droput_wrapper(self): cells = [] for _ in range(self.num_layers): cell = self.__new_cell() cell = DropoutWrapper(cell, input_keep_prob=self.in_keep_prob, output_keep_prob=self.out_keep_prob) cells.append(cell) self.multi_cell = MultiRNNCell(cells) self.initial_state = rnn_placeholders( self.multi_cell.zero_state(self.batch_size, tf.float32)) self.zero_state = self.multi_cell.zero_state(self.batch_size, tf.float32) def _build_placeholders(self): self.batch_size = tf.placeholder(tf.int32, []) self.input_placeholder = tf.placeholder(tf.float32, shape=[None, self.hot_dimen]) self.output_placeholder = tf.placeholder(tf.float32, shape=[None, self.hot_dimen]) self.in_keep_prob = tf.placeholder(tf.float32, []) self.out_keep_prob = tf.placeholder(tf.float32, []) def _static_unroll(self): self.outputs_raw, self.final_state = tf.nn.static_rnn( cell=self.multi_cell, inputs=[self.input_placeholder], dtype=tf.float32, initial_state=self.initial_state) self.outputs_raw = self.outputs_raw[0] def get_shared_variable(self, var, shape=None): with tf.variable_scope('softmax_dense', reuse=tf.AUTO_REUSE): v = tf.get_variable(var, shape) return v def __apply_dense(self, time_step): w = self.get_shared_variable('W', shape=[self.units, self.hot_dimen]) b = self.get_shared_variable('B', shape=[self.hot_dimen]) return tf.matmul(time_step, w) + b def _reshape_and_unstack(self): self.logits = self.__apply_dense(self.outputs_raw) predictions = tf.nn.softmax(self.logits) self.predictions = predictions self.entropy_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( labels=self.output_placeholder, logits=self.logits)) def _build_optimizer_and_finalize_graph(self): opt = None if self.optimizer.lower() == 'adam': opt = tf.train.AdamOptimizer elif self.optimizer.lower() == 'rms': opt = tf.train.RMSPropOptimizer else: opt = tf.train.AdamOptimizer if self.use_grad_clip: grad_vars = opt.compute_gradients(self.entropy_loss) grad_clip_const = tf.constant(self.grad_clip_val, name='grad_clipper') clipped_grad_var = [(tf.clip_by_value(grad, -grad_clip_const, grad_clip_const), var) for grad, var in grad_vars] self.grad_update = opt.apply_gradients(clipped_grad_var) else: self.grad_update = opt( self.learning_rate).minimize(loss=self.entropy_loss) def start_session(self): self.sess = tf.Session() return self.sess def train(self, file_pipe, session=None, print_loss_after_iterations=50): self.sess = session or tf.Session() self.init = tf.global_variables_initializer() if isinstance(file_pipe, FilePipeline): self.is_training_done = True assert self.hot_dimen == file_pipe.get_distinct_char_count() self.sess.run(self.init) state = self.sess.run(self.zero_state, feed_dict={ self.batch_size: self.train_batch, self.in_keep_prob: self.in_keep_prob_val, self.out_keep_prob: self.out_keep_prob_val }) all_epoch_done = False i = 0 p_bar = tqdm(total=file_pipe.get_expected_total_iteration()) p_bar.update(i) p_bar.set_description("Iteration") while not all_epoch_done: data, all_epoch_done = file_pipe.next_batch() feeder = { self.input_placeholder: data[0], self.output_placeholder: data[1], self.initial_state: state, self.batch_size: self.train_batch, self.in_keep_prob: self.in_keep_prob_val, self.out_keep_prob: self.out_keep_prob_val } # if i % print_loss_after_iterations == 0: # state, loss, _ = self.sess.run( # [self.final_state, self.entropy_loss, self.grad_update], feed_dict=feeder) # print('At Iteration {} = {}'.format(i, loss)) # else: state, _ = self.sess.run([self.final_state, self.grad_update], feed_dict=feeder) i += 1 p_bar.update(1) else: raise ValueError( "Cannot train the model. file_pipe is not an instance of FilePipeline" ) def recycle(self): self.sess.close() def sample(self, f_pipe, seq_len=5, save_as_file=None): if not self.is_training_done: raise ValueError( "You must train the model before sampling sequences.") else: state = self.sess.run(self.initial_state, feed_dict={ self.batch_size: 1, self.in_keep_prob: 1.0, self.out_keep_prob: 1.0 }) inp = np.zeros((1, self.hot_dimen)) result = [] for _ in range(seq_len): feeder = { self.input_placeholder: inp, self.initial_state: state, self.batch_size: 1, self.in_keep_prob: 1.0, self.out_keep_prob: 1.0 } inp, state = self.sess.run( [self.predictions, self.final_state], feed_dict=feeder) # pylint:disable=E1101 x = np.random.choice(self.hot_dimen, p=np.squeeze(inp)) inp = np.zeros((1, self.hot_dimen)) inp[0, x] = 1 result.append(x) preditions_to_string(f_pipe, result, save_as_file) def load_saved_checkpoints(self, version, folder=None): saver = tf.train.Saver() self.is_training_done = True if folder is None: saver.restore(self.sess, './saved-v' + str(version) + '/' + self.name) else: saver.restore(self.sess, './' + folder + str(version) + '/' + self.name) def dump_model_checkpoints(self, version, folder=None): saver = tf.train.Saver() if folder is None: saver.save(self.sess, './saved-v' + str(version) + '/' + self.name) else: saver.save(self.sess, './' + folder + str(version) + '/' + self.name) def to_json(self, path='.', file_name=None): f_name = file_name or self.name config = [ "name", "units", "train_batch", "hot_dimen", "num_layers", "cell_type", "in_keep_prob_val", "out_keep_prob_val", "learning_rate", "optimizer", "use_grad_clip", "grad_clip_val" ] data = {k: v for k, v in self.__dict__.items() if k in config} with open(os.path.join(path, f_name) + '.json', mode='w') as f: json.dump(data, f) @staticmethod def from_json(path, file_name): data = None with open(os.path.join(path, file_name)) as f: data = json.load(f) return CharToChar(**data)
def __init__( self, batch_size, inputs, outputs, num_units, cell_type ): """ Args: num_hidden : number of hidden elements of each LSTM unit. inputs : a list (tensor array) of input tensors with size hp.num_time_steps*(batch_size,dim) cell : an rnn cell object (the default option is tf.python.ops.rnn_cell.LSTMCell) reverse : Option to decode in reverse order decode_without_input : Option to decode without input - there are zeros coming to the cell instead of input """ self.batch_size = batch_size self.num_inputs = inputs[0].get_shape().as_list()[1] self.num_outputs = self.num_inputs num_time_steps = len(inputs) num_hidden = num_units[-1] self.last = inputs[-1] if len(num_units) > 1: cells = [LSTMCell(num_units=n) for n in num_units] self._lstm_cell = MultiRNNCell(cells) else: self._lstm_cell = LSTMCell(num_hidden) with tf.compat.v1.variable_scope('encoder') as ec: Wy = tf.Variable(tf.random.truncated_normal([num_hidden, self.num_outputs], dtype=tf.float32), name='enc_weight' ) by = tf.Variable(tf.random.truncated_normal([self.num_outputs], dtype=tf.float32), name='enc_bias') init_states = [] for i in range(len(num_units)): init_c = tf.zeros((batch_size, num_units[i])) init_h = init_c layer = tf.contrib.rnn.LSTMStateTuple(init_c, init_h) init_states.append(layer) init_states = tuple(init_states) if len(num_units) > 1: lstm_state = init_states else: lstm_state = init_states[0] lstm_outputs = [] for step in range(len(inputs)): if step > 0: ec.reuse_variables() lstm_input = inputs[step] (lstm_output, lstm_state) = self._lstm_cell( lstm_input, lstm_state) for step in range(len(outputs)): lstm_input = tf.matmul(lstm_output, Wy) + by lstm_outputs.append(lstm_input) (lstm_output, lstm_state) = self._lstm_cell( lstm_input, lstm_state) self.prediction = tf.transpose( tf.stack(lstm_outputs), [1, 0, 2], name='prediction') self.target = tf.transpose( tf.stack(outputs), [1, 0, 2], name='target') self.input_ = tf.transpose(tf.stack(inputs), [1, 0, 2]) self.prediction = self.prediction[:, :, 0] self.target = self.target[:, :, 0] self.enc_W = Wy self.enc_b = by
def __init__( self, batch_size, inputs, outputs, num_units, cell_type ): """ Args: inputs : a list (tensor array) of input tensors with size hp.num_time_steps*(batch_size,dim) cell : an rnn cell object (the default option is tf.python.ops.rnn_cell.LSTMCell) reverse : Option to decode in reverse order decode_without_input : Option to decode without input - there are zeros coming to the cell instead of input """ self.batch_size = batch_size self.num_inputs = inputs[0].get_shape().as_list()[1] self.num_outputs = self.num_inputs num_hidden = num_units[-1] if len(num_units) > 1: if cell_type == 'GRU': cells = [GRUCell(num_units=n) for n in num_units] else: cells = [LSTMCell(num_units=n) for n in num_units] self._enc_cell = MultiRNNCell(cells) self._dec_cell = MultiRNNCell(cells) else: if cell_type == 'GRU': self._enc_cell = GRUCell(num_hidden) self._dec_cell = GRUCell(num_hidden) else: self._enc_cell = LSTMCell(num_hidden) self._dec_cell = LSTMCell(num_hidden) # , initializer=tf.contrib.layers.xavier_initializer() with tf.compat.v1.variable_scope('encoder') as es: enc_W = tf.Variable(tf.random.truncated_normal([num_hidden, self.num_outputs], dtype=tf.float32), name='enc_weight' ) enc_b = tf.Variable(tf.random.truncated_normal([self.num_outputs], dtype=tf.float32), name='enc_bias') init_states = [] if cell_type == 'GRU': for i in range(len(num_units)): layer = tf.zeros((batch_size, num_units[i])) init_states.append(layer) else: # make the zero initial cell and hidden state as a tuple - in the shape LSTM cell expects it to be for i in range(len(num_units)): init_c = tf.zeros((batch_size, num_units[i])) init_h = init_c layer = tf.contrib.rnn.LSTMStateTuple(init_c, init_h) init_states.append(layer) init_states = tuple(init_states) if len(num_units) > 1: enc_state = init_states else: enc_state = init_states[0] enc_predictions = [] for step in range(len(inputs)): if step > 0: es.reuse_variables() enc_input = inputs[step] (enc_output, enc_state) = self._enc_cell( enc_input, enc_state) # lstm_output = hidden state, lstm_state = tuple(cell state, hidden state) #y_hat = Wy*h + by enc_prediction = tf.matmul(enc_output, enc_W) + enc_b enc_predictions.append(enc_prediction) with tf.compat.v1.variable_scope('decoder') as vs: dec_W = tf.Variable(tf.random.truncated_normal([num_hidden, self.num_outputs], dtype=tf.float32), name='dec_weight' ) dec_b = tf.Variable(tf.random.truncated_normal([self.num_outputs], dtype=tf.float32), name='dec_bias') dec_input = enc_prediction dec_state = enc_state dec_outputs = [] for step in range(len(outputs)): if step > 0: vs.reuse_variables() (dec_input, dec_state) = self._dec_cell( dec_input, dec_state) dec_input = tf.matmul(dec_input, dec_W) + dec_b dec_outputs.append(dec_input) self.prediction = tf.transpose( tf.stack(dec_outputs), [1, 0, 2], name='prediction') self.input_ = tf.transpose(tf.stack(inputs), [1, 0, 2]) self.target = tf.transpose(tf.stack(outputs), [1, 0, 2], name='target') self.prediction = self.prediction[:, :, 0] self.target = self.target[:, :, 0] self.enc_W = enc_W self.enc_b = enc_b self.dec_W = dec_W self.dec_b = dec_b
def _LSTMCells(unit_list, act_fn_list): return MultiRNNCell([ LSTMCell(unit, activation=act_fn) for unit, act_fn in zip(unit_list, act_fn_list) ])
def build_cell_layer(self): building_cell = self.build_single_cell() return MultiRNNCell([building_cell for i in range(self.depth)])
def build_decoder_cell(self, encoder_outputs, encoder_states): ''' 构建解码器的cell,返回一个解码器的cell和解码器初始化状态。 :param encoder_outputs: :param encoder_state: :return: ''' encoder_input_length = self.encoder_inputs_length batch_size = self.batch_size if self.bidirectional: encoder_states = encoder_states[-self.depth:] if self.time_major: encoder_outputs = tf.transpose(encoder_outputs, (1, 0, 2)) assert encoder_input_length is not None, 'encoder_state_length 不能为空' assert isinstance(batch_size, int), 'batchsize的值必须为int类型' assert encoder_outputs is not None, 'encoder_outputs is not None' assert encoder_states is not None, 'encoder_state is not None' #########################使用beamsearch的情况##################################################### if self.use_beamsearch_decode: '''这个tile_batch 会将tensor复制self.beam_with 份,相当于是 batch的数据变成了原来的self.beam_width 倍 ''' encoder_outputs = seq2seq.tile_batch( encoder_outputs, multiplier=self.beam_width ) encoder_states = seq2seq.tile_batch( encoder_states, multiplier=self.beam_width ) encoder_input_length = seq2seq.tile_batch( self.encoder_inputs_length, multiplier=self.beam_width ) # 如果使用了beamsearch,那么输入应该是beam_width的倍数乘以batch_size batch_size *= self.beam_width #########################使用beamsearch的情况##################################################### #########################使用注意力机制########################################################### if self.attention_type.lower() == 'luong': self.attention_mechanism = LuongAttention( num_units=self.hidden_units, memory=encoder_outputs, memory_sequence_length=encoder_input_length ) else: self.attention_mechanism = BahdanauAttention( num_units=self.hidden_units, memory=encoder_outputs, memory_sequence_length=encoder_input_length ) # 双向LSTM的话encoder_outputs 就是它的隐藏状态h1 #########################使用注意力机制########################################################### cell = MultiRNNCell( [ self.build_single_cell( self.hidden_units, use_residual=self.use_residual ) for _ in range(self.depth) ]) # 这个cell就是多层的。 alignment_history = ( self.mode != 'train' and not self.use_beamsearch_decode ) # alignment_history在不是训练状态以及没有使用beamsearch的时候使用。 def cell_input_fn(inputs, attention): ''' 根据attn_input_feeding属性来判断是否在attention计算前进行一次投影的计算 使用注意力机制才会进行的运算 :param inputs: :param attention: :return: ''' if not self.use_residual: print(inputs.get_shape, 'inputs_shape') print(attention.get_shape, 'inputs_shape') print(array_ops.concat([inputs, attention], -1), 'inputs和attention拼接之后的形状') return array_ops.concat([inputs, attention], -1) attn_projection = layers.Dense(self.hidden_units, dtype=tf.float32, use_bias=False, name='attention_cell_input_fn') ''' 这个attn_projection(array_ops.concat([inputs,attention],-1))我的理解就是 layers.Dense(self.hidden_units, dtype=tf.float32, use_bias=False, name='attention_cell_input_fn')(array_ops.concat([inputs,attention],-1)) Dense最终继承了Layer类,Layer中定义了call方法和__call__ 方法,Dense也重写了call方法,__call__方法中调用call方法,call方法中还是起一个全连接层层的作用,__call__ 方法中执行流程是:pre process,call,post process ''' return attn_projection(array_ops.concat([inputs, attention], -1)) cell = AttentionWrapper( cell=cell, attention_mechanism=self.attention_mechanism, attention_layer_size=self.hidden_units, alignment_history=alignment_history, # 这个是attention的历史信息 cell_input_fn=cell_input_fn, # 将attention拼接起来和input拼接起来 name='Attention_Wrapper' ) # AttentionWrapper 注意力机制的包裹器 decoder_initial_state = cell.zero_state( batch_size, tf.float32 ) # 这里初始化decoder_inital_state # 传递encoder的状态 decoder_initial_state = decoder_initial_state.clone( cell_state=encoder_states ) return cell, decoder_initial_state
x = tf.placeholder(dtype=tf.int32, shape=[None, None]) y = tf.placeholder(dtype=tf.int64, shape=[None]) sequence_length = tf.placeholder(dtype=tf.int32, shape=[None]) keep_prob = tf.placeholder(dtype=tf.float32) num_units = 100 n_epoch = 100 with tf.variable_scope('embedding'): rnn_input = tf.contrib.layers.embed_sequence(x, vocab_size=embed_ingred_size, embed_dim=embed_size) with tf.variable_scope('rnn'): cell = GRUCell(num_units) cell = DropoutWrapper(cell, output_keep_prob=keep_prob) cell = MultiRNNCell([cell for _ in range(num_layers)]) outputs, states = tf.nn.dynamic_rnn(cell, rnn_input, dtype=tf.float32, sequence_length=sequence_length) # ★Attention # 'outputs' is a tensor of shape [batch_size, max_time, num_of_units] # 'state' is a N-tuple where N is the number of GRUCells containing a # tf.contrib.rnn.GRUcells for each cell with tf.variable_scope('full_connected'): state = states[-1] fc = tf.contrib.layers.fully_connected(state, num_class, activation_fn=None)
def build_decoder_cell(self,encoder_outputs,encoder_state): ''' 构建解码器的cell :param encoder_outputs: :param encoder_state: :return: ''' encoder_input_length = self.encoder_inputs_length batch_size = self.batch_size if self.bidirectional: encoder_state = encoder_state[-self.depth:] if self.time_major: encoder_outputs = tf.transpose(encoder_outputs,(1,0,2)) if self.use_beamsearch_decode: '''这个tile_batch 会将tensor复制self.beam_with 份,相当于是 batch的数据变成了原来的self.beam_width 倍 ''' encoder_outputs = seq2seq.tile_batch( encoder_outputs,multiplier=self.beam_width ) encoder_state = seq2seq.tile_batch( encoder_state,multiplier=self.beam_width ) encoder_input_length = seq2seq.tile_batch( self.encoder_inputs_length,multiplier=self.beam_width ) #如果使用了beamsearch,那么输入应该是beam_width的倍数乘以batch_size batch_size *=self.beam_width if self.attention_type.lower() == 'luong': self.attention_mechanism = LuongAttention( num_units=self.hidden_units, memory=encoder_outputs, memory_sequence_length=encoder_input_length ) else: self.attention_mechanism = BahdanauAttention( num_units=self.hidden_units, memory=encoder_outputs, memory_sequence_length=encoder_input_length )#这里的memory 觉得传递得有问题,为什么不是encoder_state呢? cell = MultiRNNCell( [ self.build_single_cell( self.hidden_units, use_residual=self.use_residual ) for _ in range(self.depth) ]) alignment_history = ( self.mode != 'train' and not self.use_beamsearch_decode ) def cell_input_fn(inputs,attention): ''' 根据attn_input_feeding属性来判断是否在attention计算前进行一次投影的计算 :param inputs: :param attention: :return: ''' if not self.use_residual: return array_ops.concat([inputs,attention],-1) attn_projection = layers.Dense(self.hidden_units, dtype=tf.float32, use_bias=False, name='attention_cell_input_fn') ''' 这个attn_projection(array_ops.concat([inputs,attention],-1))我的理解就是 layers.Dense(self.hidden_units, dtype=tf.float32, use_bias=False, name='attention_cell_input_fn')(array_ops.concat([inputs,attention],-1)) 因为Dense内部实际上是定义了__call__(self): 的方法,因此可以这样使用 ''' return attn_projection(array_ops.concat([inputs,attention],-1)) cell = AttentionWrapper( cell=cell, attention_mechanism=self.attention_mechanism, attention_layer_size=self.hidden_units, alignment_history=alignment_history,#这个是attention的历史信息 cell_input_fn=cell_input_fn,#将attention拼接起来和input拼接起来 name='Attention_Wrapper' )#AttentionWrapper 注意力机制的包裹器 decoder_initial_state = cell.zero_state( batch_size,tf.float32 )#这里初始化decoder_inital_state #传递encoder的状态 decoder_initial_state = decoder_initial_state.clone( cell_state = encoder_state ) return cell,decoder_initial_state
def _INDRNNCells(unit_list, time_steps): recurrent_max = pow(2, 1 / time_steps) return MultiRNNCell([ IndRNNCell(unit, recurrent_max_abs=recurrent_max) for unit in unit_list ], state_is_tuple=True)
num_units = 100 n_epoch = 3000 with tf.variable_scope('embedding'): rnn_input = tf.contrib.layers.embed_sequence(x, vocab_size=embed_ingred_size, embed_dim=embed_size) with tf.variable_scope('rnn'): with tf.variable_scope('forward'): fw_cells = [GRUCell(num_units) for _ in range(num_layers)] fw_cells = [ DropoutWrapper(fw_cell, output_keep_prob=keep_prob) for fw_cell in fw_cells ] fw_cells = MultiRNNCell(fw_cells) with tf.variable_scope('Backward'): bw_cells = [GRUCell(num_units) for _ in range(num_layers)] bw_cells = [ DropoutWrapper(bw_cell, output_keep_prob=keep_prob) for bw_cell in bw_cells ] bw_cells = MultiRNNCell(bw_cells) outputs, states = bidirectional_dynamic_rnn( fw_cells, bw_cells, rnn_input, dtype=tf.float32, sequence_length=sequence_length)