def __call__(self, x_t, hidden_t_prev, cell_t_prev): return layers.lstm_unit(x_t=x_t, hidden_t_prev=hidden_t_prev, cell_t_prev=cell_t_prev, forget_bias=forget_bias, param_attr=self.attr_holder.param_attr, bias_attr=self.attr_holder.bias_attr)
def test_lstm_unit(self): program = Program() with program_guard(program): x_t_data = layers.data( name='x_t_data', shape=[10, 10], dtype='float32') x_t = layers.fc(input=x_t_data, size=10) prev_hidden_data = layers.data( name='prev_hidden_data', shape=[10, 30], dtype='float32') prev_hidden = layers.fc(input=prev_hidden_data, size=30) prev_cell_data = layers.data( name='prev_cell', shape=[10, 30], dtype='float32') prev_cell = layers.fc(input=prev_cell_data, size=30) self.assertIsNotNone( layers.lstm_unit( x_t=x_t, hidden_t_prev=prev_hidden, cell_t_prev=prev_cell)) print(str(program))
def test_lstm_unit(self): program = Program() with program_guard(program): x_t_data = layers.data( name='x_t_data', shape=[10, 10], dtype='float32') x_t = layers.fc(input=x_t_data, size=10) prev_hidden_data = layers.data( name='prev_hidden_data', shape=[10, 30], dtype='float32') prev_hidden = layers.fc(input=prev_hidden_data, size=30) prev_cell_data = layers.data( name='prev_cell', shape=[10, 30], dtype='float32') prev_cell = layers.fc(input=prev_cell_data, size=30) self.assertIsNotNone( layers.lstm_unit( x_t=x_t, hidden_t_prev=prev_hidden, cell_t_prev=prev_cell)) print(str(program))
def call(self, global_img_feat, p_img_feat, embedding_fn, words=None): # 图片特征 img_feat = layers.fc(p_img_feat, self.hid_size, num_flatten_dims=2, act='tanh') # [batch, k, hid] img_feat_emb = layers.fc(p_img_feat, self.hid_size, num_flatten_dims=2) if self.mode == 'eval': word = layers.fill_constant_batch_size_like(global_img_feat, [-1], dtype='int64', value=config.data['start_idx']) else: words = layers.transpose(words, [1, 0]) # [seq, batch] words.stop_gradient = True # lstm 初始化 hid, cell = create_zero_state(global_img_feat), create_zero_state(global_img_feat) # While loop 参数初始化 mx = decoder_config['sentence_length'] - 1 if self.mode == 'train' else decoder_config['infer_max_length'] if self.mode == 'eval': mx = decoder_config['infer_max_length'] while_op_output = layers.create_array('int64') else: while_op_output = layers.create_array('float32') max_step = layers.fill_constant(shape=[1], dtype='int64', value=mx) step = layers.fill_constant(shape=[1], dtype='int64', value=0) cond = layers.less_than(step, max_step) while_op = layers.While(cond) with while_op.block(): if self.mode == 'train': st = layers.cast(step, 'int32') word = layers.slice(words, axes=[0], starts=st, ends=st + 1) word = layers.squeeze(word, [0]) word.stop_gradient = True word_emb = embedding_fn(word) # 这里可能用+效果更好? xt = layers.concat([word_emb, global_img_feat], axis=-1) # [batch, feat] h, c = layers.lstm_unit(xt, hid, cell, param_attr=fluid.ParamAttr('lstm_w'), bias_attr=fluid.ParamAttr('lstm_b')) p_word_emb = layers.fc(xt, size=self.hid_size) p_hidden = layers.fc(hid, size=self.hid_size) sentinel_gate = layers.sigmoid(p_word_emb + p_hidden) # [batch, hidden] sentinel = layers.elementwise_mul(sentinel_gate, layers.tanh(c)) # [batch, hidden] layers.assign(h, hid) layers.assign(c, cell) k = layers.shape(p_img_feat)[1] p_hid = layers.fc(h, self.hid_size, act='tanh') # attention 部分 # alpha hid_emb = layers.fc(p_hid, self.hid_size) # [batch, hidden] exp_hid_emb = layers.expand(layers.unsqueeze(hid_emb, 1), [1, k + 1, 1]) # [batch, k+1, hidden] sentinel_emb = layers.unsqueeze(layers.fc(sentinel, self.hid_size), axes=1) # [batch, 1, hidden] feat_emb = layers.concat([img_feat_emb, sentinel_emb], axis=1) # [batch, k+1, hidden] z = layers.tanh(feat_emb + exp_hid_emb) # [batch, k+1, 1] alpha = layers.fc(z, size=1, num_flatten_dims=2, act='softmax') # [batch, k+1, 1] # context vector context = layers.concat([img_feat, layers.unsqueeze(sentinel, axes=1)], axis=1) # [batch, k+1, hidden] context = layers.elementwise_mul(context, alpha, axis=0) context = layers.reduce_mean(context, dim=1) # [batch, hidden] out = layers.fc(context + p_hid, self.hid_size, act='tanh') word_pred = weight_tying_fc(out) # [batch, vocab] if self.mode == 'eval': next_word = layers.argmax(word_pred, axis=-1) layers.assign(next_word, word) next_word = layers.cast(next_word, 'float32') layers.array_write(next_word, step, array=while_op_output) else: layers.array_write(word_pred, step, array=while_op_output) layers.increment(step) layers.less_than(step, max_step, cond=cond) if self.mode == 'train': output_time_major, _ = layers.tensor_array_to_tensor(while_op_output, axis=0, use_stack=True) output = layers.transpose(output_time_major, [1, 0, 2]) else: output_time_major = layers.tensor_array_to_tensor(while_op_output, axis=0, use_stack=True)[0] output = layers.transpose(output_time_major, [1, 0]) return output
def test_pre_cell_type(): error_pre_cell = fluid.data(name='error_pre_cell', shape=[batch_size, hidden_dim], dtype='int32') lstm_unit(inputs, pre_hidden, error_pre_cell)
def test_input_type(): error_input = fluid.data(name='error_input', shape=[batch_size, emb_dim], dtype='int32') lstm_unit(error_input, pre_hidden, pre_cell)
def test_pre_cell_Variable(): lstm_unit(inputs, pre_hidden, np_pre_cell)
def test_input_Variable(): lstm_unit(np_input, pre_hidden, pre_cell)