def attn_flow(q_enc, p_enc, p_ids_name, args): """Bidirectional Attention layer""" tag = p_ids_name + "__" drnn = layers.DynamicRNN() with drnn.block(): h_cur = drnn.step_input(p_enc) u_all = drnn.static_input(q_enc) h_expd = layers.sequence_expand(x=h_cur, y=u_all) s_t_mul = layers.elementwise_mul(x=u_all, y=h_expd, axis=0) s_t_sum = layers.reduce_sum(input=s_t_mul, dim=1, keep_dim=True) s_t_re = layers.reshape(s_t_sum, shape=[-1, 0]) s_t = layers.sequence_softmax(input=s_t_re) u_expr = layers.elementwise_mul(x=u_all, y=s_t, axis=0) u_expr = layers.sequence_pool(input=u_expr, pool_type='sum') b_t = layers.sequence_pool(input=s_t_sum, pool_type='max') drnn.output(u_expr, b_t) U_expr, b = drnn() b_norm = layers.sequence_softmax(input=b) h_expr = layers.elementwise_mul(x=p_enc, y=b_norm, axis=0) h_expr = layers.sequence_pool(input=h_expr, pool_type='sum') H_expr = layers.sequence_expand(x=h_expr, y=p_enc) H_expr = layers.lod_reset(x=H_expr, y=p_enc) h_u = layers.elementwise_mul(x=p_enc, y=U_expr, axis=0) h_h = layers.elementwise_mul(x=p_enc, y=H_expr, axis=0) g = layers.concat(input=[p_enc, U_expr, h_u, h_h], axis=1) return dropout(g, args)
def custom_dynamic_rnn(p_vec, init_state, decoder_size): context = layers.fc(input=p_vec, size=decoder_size, act=None) drnn = layers.DynamicRNN() with drnn.block(): H_s = drnn.step_input(p_vec) ctx = drnn.static_input(context) c_prev = drnn.memory(init=init_state, need_reorder=True) m_prev = drnn.memory(init=init_state, need_reorder=True) m_prev1 = layers.fc(input=m_prev, size=decoder_size, act=None) m_prev1 = layers.sequence_expand(x=m_prev1, y=ctx) Fk = ctx + m_prev1 Fk = layers.fc(input=Fk, size=decoder_size, act='tanh') logits = layers.fc(input=Fk, size=1, act=None) scores = layers.sequence_softmax(input=logits) attn_ctx = layers.elementwise_mul(x=ctx, y=scores, axis=0) attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum') hidden_t, cell_t = lstm_step(attn_ctx, hidden_t_prev=m_prev1, cell_t_prev=c_prev, size=decoder_size) drnn.update_memory(ex_mem=m_prev, new_mem=hidden_t) drnn.update_memory(ex_mem=c_prev, new_mem=cell_t) drnn.output(scores) beta = drnn() return beta
def exp_sequence_expand(): # x = fluid.data(name='x', shape=[1], dtype='float32') # y = fluid.data(name='y', shape=[1], dtype='float32', lod_level=1) # out = layers.sequence_expand(x=x, y=y, ref_level=0) x = fluid.data(name='x', shape=[4, 1], dtype='float32') y = fluid.data(name='y', shape=[8, 1], dtype='float32', lod_level=1) out = layers.sequence_expand(x=x, y=y, ref_level=0) place = fluid.CPUPlace() exe = fluid.Executor(place) np_data = np.array([[1], [2], [3], [4]]).astype('float32') x_lod_tensor = fluid.create_lod_tensor(np_data, [[2, 2]], place) print(x_lod_tensor) y_lod_tensor = fluid.create_random_int_lodtensor([[2, 2], [3, 3, 1, 1]], [1], place, low=0, high=1) y_lod_tensor2 = fluid.create_random_int_lodtensor([[2, 2], [3, 3, 1, 1]], [9, 16], place, low=0, high=1) print(y_lod_tensor) print(y_lod_tensor2) # lod: [[0, 2, 4][0, 3, 6, 7, 8]] # dim: 8, 1 # layout: NCHW # dtype: int64_t # data: [0 0 1 1 1 1 1 0] out_main = exe.run(fluid.default_main_program(), feed={'x': x_lod_tensor, 'y': y_lod_tensor}, fetch_list=[out], return_numpy=False) print(out_main[0])
def test_sequence_expand(self): program = Program() with program_guard(program): x = layers.data(name='x', shape=[10], dtype='float32') y = layers.data( name='y', shape=[10, 20], dtype='float32', lod_level=2) self.assertIsNotNone(layers.sequence_expand(x=x, y=y, ref_level=1)) print(str(program))
def test_sequence_expand(self): program = Program() with program_guard(program): x = layers.data(name='x', shape=[10], dtype='float32') y = layers.data( name='y', shape=[10, 20], dtype='float32', lod_level=2) self.assertIsNotNone(layers.sequence_expand(x=x, y=y, ref_level=1)) print(str(program))
def static_rnn(step, p_vec=p_vec, init_state=None, para_name='', args=args): tag = para_name + "static_rnn_" ctx = layers.fc( input=p_vec, param_attr=fluid.ParamAttr(name=tag + 'context_fc_w'), bias_attr=fluid.ParamAttr(name=tag + 'context_fc_b'), size=hidden_size, act=None) beta = [] c_prev = init_state m_prev = init_state for i in range(step): m_prev0 = layers.fc( input=m_prev, size=hidden_size, act=None, param_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_w'), bias_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_b')) m_prev1 = layers.sequence_expand(x=m_prev0, y=ctx) Fk = ctx + m_prev1 Fk = layers.tanh(Fk) logits = layers.fc( input=Fk, size=1, act=None, param_attr=fluid.ParamAttr(name=tag + 'logits_fc_w'), bias_attr=fluid.ParamAttr(name=tag + 'logits_fc_b')) scores = layers.sequence_softmax(input=logits) attn_ctx = layers.elementwise_mul(x=p_vec, y=scores, axis=0) attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum') hidden_t, cell_t = lstm_step( attn_ctx, hidden_t_prev=m_prev, cell_t_prev=c_prev, size=hidden_size, para_name=tag, args=args) m_prev = hidden_t c_prev = cell_t beta.append(scores) return beta
def simple_attention(self, encoder_vec, encoder_proj, decoder_state, decoder_size): decoder_state_proj = layers.fc(input=decoder_state, size=decoder_size, bias_attr=False, name="decoder_state_proj_fc") decoder_state_expand = layers.sequence_expand( x=decoder_state_proj, y=encoder_proj) concated = layers.elementwise_add(encoder_proj, decoder_state_expand) concated = layers.tanh(x=concated) attention_weights = layers.fc(input=concated, size=1, act=None, bias_attr=False, name="attention_weights_fc") attention_weights = layers.sequence_softmax(input=attention_weights) weigths_reshape = layers.reshape(x=attention_weights, shape=[-1]) scaled = layers.elementwise_mul( x=encoder_vec, y=weigths_reshape, axis=0) context = layers.sequence_pool(input=scaled, pool_type='sum') return context
def decoder_decode(context, is_sparse): init_state = context array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length) counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True) # fill the first element with init_state state_array = pd.create_array('float32') pd.array_write(init_state, array=state_array, i=counter) # ids, scores as memory ids_array = pd.create_array('int64') scores_array = pd.create_array('float32') init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2) init_scores = pd.data(name="init_scores", shape=[1], dtype="float32", lod_level=2) pd.array_write(init_ids, array=ids_array, i=counter) pd.array_write(init_scores, array=scores_array, i=counter) cond = pd.less_than(x=counter, y=array_len) while_op = pd.While(cond=cond) with while_op.block(): pre_ids = pd.array_read(array=ids_array, i=counter) pre_state = pd.array_read(array=state_array, i=counter) pre_score = pd.array_read(array=scores_array, i=counter) # expand the lod of pre_state to be the same with pre_score pre_state_expanded = pd.sequence_expand(pre_state, pre_score) pre_ids_emb = pd.embedding(input=pre_ids, size=[dict_size, word_dim], dtype='float32', is_sparse=is_sparse) # use rnn unit to update rnn current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb], size=decoder_size, act='tanh') current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score) # use score to do beam search current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') topk_scores, topk_indices = pd.topk(current_score, k=50) selected_ids, selected_scores = pd.beam_search(pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) pd.increment(x=counter, value=1, in_place=True) # update the memories pd.array_write(current_state, array=state_array, i=counter) pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) pd.less_than(x=counter, y=array_len, cond=cond) translation_ids, translation_scores = pd.beam_search_decode( ids=ids_array, scores=scores_array) # return init_ids, init_scores return translation_ids, translation_scores
def decoder_decode(context, is_sparse): init_state = context array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length) counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True) # fill the first element with init_state state_array = pd.create_array('float32') pd.array_write(init_state, array=state_array, i=counter) # ids, scores as memory ids_array = pd.create_array('int64') scores_array = pd.create_array('float32') init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2) init_scores = pd.data( name="init_scores", shape=[1], dtype="float32", lod_level=2) pd.array_write(init_ids, array=ids_array, i=counter) pd.array_write(init_scores, array=scores_array, i=counter) cond = pd.less_than(x=counter, y=array_len) while_op = pd.While(cond=cond) with while_op.block(): pre_ids = pd.array_read(array=ids_array, i=counter) pre_state = pd.array_read(array=state_array, i=counter) pre_score = pd.array_read(array=scores_array, i=counter) # expand the recursive_sequence_lengths of pre_state to be the same with pre_score pre_state_expanded = pd.sequence_expand(pre_state, pre_score) pre_ids_emb = pd.embedding( input=pre_ids, size=[dict_size, word_dim], dtype='float32', is_sparse=is_sparse) # use rnn unit to update rnn current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb], size=decoder_size, act='tanh') current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score) # use score to do beam search current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') topk_scores, topk_indices = pd.topk(current_score, k=beam_size) # calculate accumulated scores after topk to reduce computation cost accu_scores = pd.elementwise_add( x=pd.log(topk_scores), y=pd.reshape( pre_score, shape=[-1]), axis=0) selected_ids, selected_scores = pd.beam_search( pre_ids, pre_score, topk_indices, accu_scores, beam_size, end_id=10, level=0) pd.increment(x=counter, value=1, in_place=True) # update the memories pd.array_write(current_state, array=state_array, i=counter) pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) # update the break condition: up to the max length or all candidates of # source sentences have ended. length_cond = pd.less_than(x=counter, y=array_len) finish_cond = pd.logical_not(pd.is_empty(x=selected_ids)) pd.logical_and(x=length_cond, y=finish_cond, out=cond) translation_ids, translation_scores = pd.beam_search_decode( ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10) # return init_ids, init_scores return translation_ids, translation_scores
def beam_search(): max_len = layers.fill_constant( shape=[1], dtype=start_tokens.dtype, value=max_out_len) step_idx = layers.fill_constant( shape=[1], dtype=start_tokens.dtype, value=0) cond = layers.less_than(x=step_idx, y=max_len) while_op = layers.While(cond) # array states will be stored for each step. ids = layers.array_write(start_tokens, step_idx) scores = layers.array_write(init_scores, step_idx) # cell states will be overwrited at each step. # caches contains states of history steps to reduce redundant # computation in decoder. caches = [{ "k": layers.fill_constant_batch_size_like( input=start_tokens, shape=[-1, 0, d_model], dtype=enc_output.dtype, value=0), "v": layers.fill_constant_batch_size_like( input=start_tokens, shape=[-1, 0, d_model], dtype=enc_output.dtype, value=0) } for i in range(n_layer)] with while_op.block(): pre_ids = layers.array_read(array=ids, i=step_idx) pre_scores = layers.array_read(array=scores, i=step_idx) # sequence_expand can gather sequences according to lod thus can be # used in beam search to sift states corresponding to selected ids. pre_src_attn_bias = layers.sequence_expand( x=trg_src_attn_bias, y=pre_scores) pre_enc_output = layers.sequence_expand(x=enc_output, y=pre_scores) pre_caches = [{ "k": layers.sequence_expand( x=cache["k"], y=pre_scores), "v": layers.sequence_expand( x=cache["v"], y=pre_scores), } for cache in caches] pre_pos = layers.elementwise_mul( x=layers.fill_constant_batch_size_like( input=pre_enc_output, # cann't use pre_ids here since it has lod value=1, shape=[-1, 1], dtype=pre_ids.dtype), y=layers.increment( x=step_idx, value=1.0, in_place=False), axis=0) logits = wrap_decoder( trg_vocab_size, max_in_len, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, dropout_rate, weight_sharing, dec_inputs=( pre_ids, pre_pos, None, pre_src_attn_bias, trg_data_shape, slf_attn_pre_softmax_shape, slf_attn_post_softmax_shape, src_attn_pre_softmax_shape, src_attn_post_softmax_shape), enc_output=pre_enc_output, caches=pre_caches) topk_scores, topk_indices = layers.topk( input=layers.softmax(logits), k=beam_size) accu_scores = layers.elementwise_add( x=layers.log(topk_scores), y=layers.reshape( pre_scores, shape=[-1]), axis=0) # beam_search op uses lod to distinguish branches. topk_indices = layers.lod_reset(topk_indices, pre_ids) selected_ids, selected_scores = layers.beam_search( pre_ids=pre_ids, pre_scores=pre_scores, ids=topk_indices, scores=accu_scores, beam_size=beam_size, end_id=eos_idx) layers.increment(x=step_idx, value=1.0, in_place=True) # update states layers.array_write(selected_ids, i=step_idx, array=ids) layers.array_write(selected_scores, i=step_idx, array=scores) layers.assign(pre_src_attn_bias, trg_src_attn_bias) layers.assign(pre_enc_output, enc_output) for i in range(n_layer): layers.assign(pre_caches[i]["k"], caches[i]["k"]) layers.assign(pre_caches[i]["v"], caches[i]["v"]) layers.assign( layers.elementwise_add( x=slf_attn_pre_softmax_shape, y=attn_pre_softmax_shape_delta), slf_attn_pre_softmax_shape) layers.assign( layers.elementwise_add( x=slf_attn_post_softmax_shape, y=attn_post_softmax_shape_delta), slf_attn_post_softmax_shape) length_cond = layers.less_than(x=step_idx, y=max_len) finish_cond = layers.logical_not(layers.is_empty(x=selected_ids)) layers.logical_and(x=length_cond, y=finish_cond, out=cond) finished_ids, finished_scores = layers.beam_search_decode( ids, scores, beam_size=beam_size, end_id=eos_idx) return finished_ids, finished_scores
def gru_attention_infer(self, decoder_boot, max_length, char_num, word_vector_dim, encoded_vector, encoded_proj, decoder_size): init_state = decoder_boot beam_size = 1 array_len = layers.fill_constant( shape=[1], dtype='int64', value=max_length) counter = layers.zeros(shape=[1], dtype='int64', force_cpu=True) # fill the first element with init_state state_array = layers.create_array('float32') layers.array_write(init_state, array=state_array, i=counter) # ids, scores as memory ids_array = layers.create_array('int64') scores_array = layers.create_array('float32') rois_shape = layers.shape(init_state) batch_size = layers.slice( rois_shape, axes=[0], starts=[0], ends=[1]) + 1 lod_level = layers.range( start=0, end=batch_size, step=1, dtype=batch_size.dtype) init_ids = layers.fill_constant_batch_size_like( input=init_state, shape=[-1, 1], value=0, dtype='int64') init_ids = layers.lod_reset(init_ids, lod_level) init_ids = layers.lod_append(init_ids, lod_level) init_scores = layers.fill_constant_batch_size_like( input=init_state, shape=[-1, 1], value=1, dtype='float32') init_scores = layers.lod_reset(init_scores, init_ids) layers.array_write(init_ids, array=ids_array, i=counter) layers.array_write(init_scores, array=scores_array, i=counter) full_ids = fluid.layers.fill_constant_batch_size_like( input=init_state, shape=[-1, 1], dtype='int64', value=1) cond = layers.less_than(x=counter, y=array_len) while_op = layers.While(cond=cond) with while_op.block(): pre_ids = layers.array_read(array=ids_array, i=counter) pre_state = layers.array_read(array=state_array, i=counter) pre_score = layers.array_read(array=scores_array, i=counter) pre_ids_emb = layers.embedding( input=pre_ids, size=[char_num, word_vector_dim], dtype='float32') context = self.simple_attention(encoded_vector, encoded_proj, pre_state, decoder_size) # expand the recursive_sequence_lengths of pre_state # to be the same with pre_score pre_state_expanded = layers.sequence_expand(pre_state, pre_score) context_expanded = layers.sequence_expand(context, pre_score) fc_1 = layers.fc(input=context_expanded, size=decoder_size * 3, bias_attr=False, name="rnn_fc1") fc_2 = layers.fc(input=pre_ids_emb, size=decoder_size * 3, bias_attr=False, name="rnn_fc2") decoder_inputs = fc_1 + fc_2 current_state, _, _ = layers.gru_unit( input=decoder_inputs, hidden=pre_state_expanded, size=decoder_size * 3) current_state_with_lod = layers.lod_reset( x=current_state, y=pre_score) # use score to do beam search current_score = layers.fc(input=current_state_with_lod, size=char_num, bias_attr=True, act='softmax', name="rnn_out_fc") topk_scores, topk_indices = layers.topk(current_score, k=beam_size) new_ids = fluid.layers.concat([full_ids, topk_indices], axis=1) fluid.layers.assign(new_ids, full_ids) layers.increment(x=counter, value=1, in_place=True) # update the memories layers.array_write(current_state, array=state_array, i=counter) layers.array_write(topk_indices, array=ids_array, i=counter) layers.array_write(topk_scores, array=scores_array, i=counter) # update the break condition: # up to the max length or all candidates of # source sentences have ended. length_cond = layers.less_than(x=counter, y=array_len) finish_cond = layers.logical_not(layers.is_empty(x=topk_indices)) layers.logical_and(x=length_cond, y=finish_cond, out=cond) return full_ids
def attn_flow(q_enc, p_enc, p_ids_name): tag = p_ids_name + "::" drnn = layers.DynamicRNN() with drnn.block(): h_cur = drnn.step_input(p_enc) u_all = drnn.static_input(q_enc) h_expd = layers.sequence_expand(x=h_cur, y=u_all) s_t_ = layers.elementwise_mul(x=u_all, y=h_expd, axis=0) s_t1 = layers.reduce_sum(input=s_t_, dim=1) s_t = layers.sequence_softmax(input=s_t1) u_expr = layers.elementwise_mul(x=u_all, y=s_t, axis=0) u_expr = layers.sequence_pool(input=u_expr, pool_type='sum') if args.debug == True: ''' layers.Print(h_expd, message='h_expd') layers.Print(h_cur, message='h_cur') layers.Print(u_all, message='u_all') layers.Print(s_t, message='s_t') layers.Print(s_t_, message='s_t_') layers.Print(u_expr, message='u_expr') ''' drnn.output(u_expr) U_expr = drnn() #''' drnn2 = layers.DynamicRNN() with drnn2.block(): h_cur = drnn2.step_input(p_enc) u_all = drnn2.static_input(q_enc) h_expd = layers.sequence_expand(x=h_cur, y=u_all) s_t_ = layers.elementwise_mul(x=u_all, y=h_expd, axis=0) s_t2 = layers.reduce_sum(input=s_t_, dim=1, keep_dim=True) b_t = layers.sequence_pool(input=s_t2, pool_type='max') if args.debug == True: ''' layers.Print(s_t2, message='s_t2') layers.Print(b_t, message='b_t') ''' drnn2.output(b_t) b = drnn2() b_norm = layers.sequence_softmax(input=b) h_expr = layers.elementwise_mul(x=p_enc, y=b_norm, axis=0) h_expr = layers.sequence_pool(input=h_expr, pool_type='sum') H_expr = layers.sequence_expand(x=h_expr, y=p_enc) H_expr = layers.lod_reset(x=H_expr, y=p_enc) h_u = layers.elementwise_mul(x=H_expr, y=U_expr, axis=0) h_h = layers.elementwise_mul(x=H_expr, y=p_enc, axis=0) g = layers.concat(input=[H_expr, U_expr, h_u, h_h], axis = 1) #fusion m = bi_lstm_encoder(input_seq=g, gate_size=embedding_dim) if args.debug == True: layers.Print(U_expr, message=tag + 'U_expr') layers.Print(H_expr, message=tag + 'H_expr') layers.Print(b, message=tag + 'b') layers.Print(b_norm, message=tag + 'b_norm') layers.Print(g, message=tag +'g') layers.Print(m, message=tag + 'm') layers.Print(h_h, message=tag + 'h_h') layers.Print(q_enc, message=tag + 'q_enc') layers.Print(p_enc, message=tag + 'p_enc') return m, g
def point_network_decoder(p_vec, q_vec, decoder_size): random_attn = layers.gaussian_random(shape=[1, decoder_size]) random_attn = layers.sequence_expand(x=random_attn, y=q_vec) random_attn = layers.fc(input=random_attn, size=decoder_size, act=None) U = layers.fc(input=q_vec, size=decoder_size, act=None) + random_attn U = layers.tanh(U) logits = layers.fc(input=U, size=1, act=None) scores = layers.sequence_softmax(input=logits) pooled_vec = layers.elementwise_mul(x=q_vec, y=scores, axis=0) pooled_vec = layers.sequence_pool(input=pooled_vec, pool_type='sum') init_state = layers.fc(input=pooled_vec, size=decoder_size, act=None) def custom_dynamic_rnn(p_vec, init_state, decoder_size): context = layers.fc(input=p_vec, size=decoder_size, act=None) drnn = layers.DynamicRNN() with drnn.block(): H_s = drnn.step_input(p_vec) ctx = drnn.static_input(context) c_prev = drnn.memory(init=init_state, need_reorder=True) m_prev = drnn.memory(init=init_state, need_reorder=True) m_prev1 = layers.fc(input=m_prev, size=decoder_size, act=None) m_prev1 = layers.sequence_expand(x=m_prev1, y=ctx) Fk = ctx + m_prev1 Fk = layers.fc(input=Fk, size=decoder_size, act='tanh') logits = layers.fc(input=Fk, size=1, act=None) scores = layers.sequence_softmax(input=logits) attn_ctx = layers.elementwise_mul(x=ctx, y=scores, axis=0) attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum') hidden_t, cell_t = lstm_step(attn_ctx, hidden_t_prev=m_prev1, cell_t_prev=c_prev, size=decoder_size) drnn.update_memory(ex_mem=m_prev, new_mem=hidden_t) drnn.update_memory(ex_mem=c_prev, new_mem=cell_t) drnn.output(scores) beta = drnn() return beta fw_outputs = custom_dynamic_rnn(p_vec, init_state, decoder_size) bw_outputs = custom_dynamic_rnn(p_vec, init_state, decoder_size) def sequence_slice(x, index): #offset = layers.fill_constant(shape=[1, args.batch_size], value=index, dtype='float32') #length = layers.fill_constant(shape=[1, args.batch_size], value=1, dtype='float32') #return layers.sequence_slice(x, offset, length) idx = layers.fill_constant(shape=[1], value=1, dtype='int32') idx.stop_gradient = True from paddle.fluid.layers.control_flow import lod_rank_table from paddle.fluid.layers.control_flow import lod_tensor_to_array from paddle.fluid.layers.control_flow import array_read from paddle.fluid.layers.control_flow import array_to_lod_tensor table = lod_rank_table(x, level=0) table.stop_gradient = True array = lod_tensor_to_array(x, table) slice_array = array_read(array=array, i=idx) return array_to_lod_tensor(slice_array, table) start_prob = layers.elementwise_mul(x=sequence_slice(fw_outputs, 0), y=sequence_slice(bw_outputs, 1), axis=0) / 2 end_prob = layers.elementwise_mul(x=sequence_slice(fw_outputs, 1), y=sequence_slice(bw_outputs, 0), axis=0) / 2 return start_prob, end_prob
def decode(context, is_sparse): init_state = context array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length) counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True) # fill the first element with init_state state_array = pd.create_array('float32') pd.array_write(init_state, array=state_array, i=counter) # ids, scores as memory ids_array = pd.create_array('int64') scores_array = pd.create_array('float32') init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2) init_scores = pd.data( name="init_scores", shape=[1], dtype="float32", lod_level=2) pd.array_write(init_ids, array=ids_array, i=counter) pd.array_write(init_scores, array=scores_array, i=counter) cond = pd.less_than(x=counter, y=array_len) while_op = pd.While(cond=cond) with while_op.block(): pre_ids = pd.array_read(array=ids_array, i=counter) pre_state = pd.array_read(array=state_array, i=counter) pre_score = pd.array_read(array=scores_array, i=counter) # expand the lod of pre_state to be the same with pre_score pre_state_expanded = pd.sequence_expand(pre_state, pre_score) pre_ids_emb = pd.embedding( input=pre_ids, size=[dict_size, word_dim], dtype='float32', is_sparse=is_sparse) # use rnn unit to update rnn current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb], size=decoder_size, act='tanh') current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score) # use score to do beam search current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') topk_scores, topk_indices = pd.topk(current_score, k=topk_size) selected_ids, selected_scores = pd.beam_search( pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) pd.increment(x=counter, value=1, in_place=True) # update the memories pd.array_write(current_state, array=state_array, i=counter) pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) pd.less_than(x=counter, y=array_len, cond=cond) translation_ids, translation_scores = pd.beam_search_decode( ids=ids_array, scores=scores_array) # return init_ids, init_scores return translation_ids, translation_scores