def concat_node_edge_feat(node_feat, edge_feat, nlod, elod): """ This function can concat node features and edge features to form the node-edge feature matrix. Args: node_feat(Variable): A tensor of node features with shape (num_nodes, feature_size). edge_feat(Variable): A tensor of edge features with shape (num_edges, feature_size). nlod(Variable): Graph Lod Index for Node Items for Concat. elod(Variable): Graph Lod Index for Edge Items for Concat. Returns: Variable: The updated node-edge feature matrix with shape (num_nodes + num_edges, feature_size). """ node_feat_lod = L.lod_reset(node_feat, nlod) edge_feat_lod = L.lod_reset(edge_feat, elod) node_edge_feat_lod = L.sequence_concat([node_feat_lod, edge_feat_lod]) return node_edge_feat_lod
def fluid_sequence_advance(input, OOV): """ args: input.data = [1,2,3, 4,5] input.lod = [[0, 3, 5]] return: output.data = [0,1,2, 0,4] output.lod = [[0, 3, 5]] """ seq_len = fluid_sequence_get_seq_len(input) zeros = layers.fill_constant_batch_size_like(seq_len, shape=[-1,1], value=0, dtype='int64') ones = layers.fill_constant_batch_size_like(seq_len, shape=[-1,1], value=1, dtype='int64') oov = layers.sequence_slice(input, zeros, ones) * 0 + OOV oov.stop_gradient = True input_padded = layers.sequence_concat([oov, input]) output = layers.sequence_slice(input_padded, zeros, seq_len) return output
def fluid_sequence_delay2(input, seq_len, OOV): """ args: input: 1-level LoDTensor seq_len: 1- return: """ oov = layers.cast(seq_len * 0 + OOV, input.dtype) oov.stop_gradient = True input_padded = layers.sequence_concat([input, oov]) offset = layers.fill_constant_batch_size_like(seq_len, shape=[-1, 1], value=1, dtype='int64') output = layers.sequence_slice(input_padded, offset, layers.cast(seq_len, 'int64')) return output
def fluid_sequence_delay(input, OOV): """ args: input: 1-level LoDTensor return: """ seq_len = fluid_sequence_get_seq_len(input) zeros = layers.fill_constant_batch_size_like(seq_len, shape=[-1, 1], value=0, dtype='int64') ones = layers.fill_constant_batch_size_like(seq_len, shape=[-1, 1], value=1, dtype='int64') oov = layers.sequence_slice(input, zeros, ones) * 0 + OOV oov.stop_gradient = True input_padded = layers.sequence_concat([input, oov]) output = layers.sequence_slice(input_padded, ones, seq_len) return output
def bidaf(embedding_dim, encoder_size, decoder_size, source_dict_dim, target_dict_dim, max_length, args): def bi_lstm_encoder(input_seq, gate_size): # A bi-directional lstm encoder implementation. # Linear transformation part for input gate, output gate, forget gate # and cell activation vectors need be done outside of dynamic_lstm. # So the output size is 4 times of gate_size. input_forward_proj = layers.fc(input=input_seq, size=gate_size * 4, act='tanh', bias_attr=False) forward, _ = layers.dynamic_lstm( input=input_forward_proj, size=gate_size * 4, use_peepholes=False) input_reversed_proj = layers.fc(input=input_seq, size=gate_size * 4, act='tanh', bias_attr=False) reversed, _ = layers.dynamic_lstm( input=input_reversed_proj, size=gate_size * 4, is_reverse=True, use_peepholes=False) encoder_out = layers.concat(input=[forward, reversed], axis = 1) return encoder_out def encoder(input_name): input_ids = layers.data( name=input_name, shape=[1], dtype='int64', lod_level=1) input_embedding = layers.embedding( input=input_ids, size=[source_dict_dim, embedding_dim], dtype='float32', is_sparse=True) encoder_out = bi_lstm_encoder(input_seq=input_embedding, gate_size=embedding_dim) return encoder_out def attn_flow(q_enc, p_enc, p_ids_name): tag = p_ids_name + "::" drnn = layers.DynamicRNN() with drnn.block(): h_cur = drnn.step_input(p_enc) u_all = drnn.static_input(q_enc) h_expd = layers.sequence_expand(x=h_cur, y=u_all) s_t_ = layers.elementwise_mul(x=u_all, y=h_expd, axis=0) s_t1 = layers.reduce_sum(input=s_t_, dim=1) s_t = layers.sequence_softmax(input=s_t1) u_expr = layers.elementwise_mul(x=u_all, y=s_t, axis=0) u_expr = layers.sequence_pool(input=u_expr, pool_type='sum') if args.debug == True: ''' layers.Print(h_expd, message='h_expd') layers.Print(h_cur, message='h_cur') layers.Print(u_all, message='u_all') layers.Print(s_t, message='s_t') layers.Print(s_t_, message='s_t_') layers.Print(u_expr, message='u_expr') ''' drnn.output(u_expr) U_expr = drnn() #''' drnn2 = layers.DynamicRNN() with drnn2.block(): h_cur = drnn2.step_input(p_enc) u_all = drnn2.static_input(q_enc) h_expd = layers.sequence_expand(x=h_cur, y=u_all) s_t_ = layers.elementwise_mul(x=u_all, y=h_expd, axis=0) s_t2 = layers.reduce_sum(input=s_t_, dim=1, keep_dim=True) b_t = layers.sequence_pool(input=s_t2, pool_type='max') if args.debug == True: ''' layers.Print(s_t2, message='s_t2') layers.Print(b_t, message='b_t') ''' drnn2.output(b_t) b = drnn2() b_norm = layers.sequence_softmax(input=b) h_expr = layers.elementwise_mul(x=p_enc, y=b_norm, axis=0) h_expr = layers.sequence_pool(input=h_expr, pool_type='sum') H_expr = layers.sequence_expand(x=h_expr, y=p_enc) H_expr = layers.lod_reset(x=H_expr, y=p_enc) h_u = layers.elementwise_mul(x=H_expr, y=U_expr, axis=0) h_h = layers.elementwise_mul(x=H_expr, y=p_enc, axis=0) g = layers.concat(input=[H_expr, U_expr, h_u, h_h], axis = 1) #fusion m = bi_lstm_encoder(input_seq=g, gate_size=embedding_dim) if args.debug == True: layers.Print(U_expr, message=tag + 'U_expr') layers.Print(H_expr, message=tag + 'H_expr') layers.Print(b, message=tag + 'b') layers.Print(b_norm, message=tag + 'b_norm') layers.Print(g, message=tag +'g') layers.Print(m, message=tag + 'm') layers.Print(h_h, message=tag + 'h_h') layers.Print(q_enc, message=tag + 'q_enc') layers.Print(p_enc, message=tag + 'p_enc') return m, g def lstm_step(x_t, hidden_t_prev, cell_t_prev, size): def linear(inputs): return layers.fc(input=inputs, size=size, bias_attr=True) forget_gate = layers.sigmoid(x=linear([hidden_t_prev, x_t])) input_gate = layers.sigmoid(x=linear([hidden_t_prev, x_t])) output_gate = layers.sigmoid(x=linear([hidden_t_prev, x_t])) cell_tilde = layers.tanh(x=linear([hidden_t_prev, x_t])) cell_t = layers.sums(input=[ layers.elementwise_mul( x=forget_gate, y=cell_t_prev), layers.elementwise_mul( x=input_gate, y=cell_tilde) ]) hidden_t = layers.elementwise_mul( x=output_gate, y=layers.tanh(x=cell_t)) return hidden_t, cell_t #point network def point_network_decoder(p_vec, q_vec, decoder_size): random_attn = layers.gaussian_random(shape=[1, decoder_size]) random_attn = layers.sequence_expand(x=random_attn, y=q_vec) random_attn = layers.fc(input=random_attn, size=decoder_size, act=None) U = layers.fc(input=q_vec, size=decoder_size, act=None) + random_attn U = layers.tanh(U) logits = layers.fc(input=U, size=1, act=None) scores = layers.sequence_softmax(input=logits) pooled_vec = layers.elementwise_mul(x=q_vec, y=scores, axis=0) pooled_vec = layers.sequence_pool(input=pooled_vec, pool_type='sum') init_state = layers.fc(input=pooled_vec, size=decoder_size, act=None) def custom_dynamic_rnn(p_vec, init_state, decoder_size): context = layers.fc(input=p_vec, size=decoder_size, act=None) drnn = layers.DynamicRNN() with drnn.block(): H_s = drnn.step_input(p_vec) ctx = drnn.static_input(context) c_prev = drnn.memory(init=init_state, need_reorder=True) m_prev = drnn.memory(init=init_state, need_reorder=True) m_prev1 = layers.fc(input=m_prev, size=decoder_size, act=None) m_prev1 = layers.sequence_expand(x=m_prev1, y=ctx) Fk = ctx + m_prev1 Fk = layers.fc(input=Fk, size=decoder_size, act='tanh') logits = layers.fc(input=Fk, size=1, act=None) scores = layers.sequence_softmax(input=logits) attn_ctx = layers.elementwise_mul(x=ctx, y=scores, axis=0) attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum') hidden_t, cell_t = lstm_step(attn_ctx, hidden_t_prev=m_prev1, cell_t_prev=c_prev, size=decoder_size) drnn.update_memory(ex_mem=m_prev, new_mem=hidden_t) drnn.update_memory(ex_mem=c_prev, new_mem=cell_t) drnn.output(scores) beta = drnn() return beta fw_outputs = custom_dynamic_rnn(p_vec, init_state, decoder_size) bw_outputs = custom_dynamic_rnn(p_vec, init_state, decoder_size) def sequence_slice(x, index): #offset = layers.fill_constant(shape=[1, args.batch_size], value=index, dtype='float32') #length = layers.fill_constant(shape=[1, args.batch_size], value=1, dtype='float32') #return layers.sequence_slice(x, offset, length) idx = layers.fill_constant(shape=[1], value=1, dtype='int32') idx.stop_gradient = True from paddle.fluid.layers.control_flow import lod_rank_table from paddle.fluid.layers.control_flow import lod_tensor_to_array from paddle.fluid.layers.control_flow import array_read from paddle.fluid.layers.control_flow import array_to_lod_tensor table = lod_rank_table(x, level=0) table.stop_gradient = True array = lod_tensor_to_array(x, table) slice_array = array_read(array=array, i=idx) return array_to_lod_tensor(slice_array, table) start_prob = layers.elementwise_mul(x=sequence_slice(fw_outputs, 0), y=sequence_slice(bw_outputs, 1), axis=0) / 2 end_prob = layers.elementwise_mul(x=sequence_slice(fw_outputs, 1), y=sequence_slice(bw_outputs, 0), axis=0) / 2 return start_prob, end_prob q_enc = encoder('q_ids') if args.single_doc: p_enc = encoder('p_ids') m, g = attn_flow(q_enc, p_enc, 'p_ids') else: p_ids_names = [] ms = [] gs = [] for i in range(args.doc_num): p_ids_name = "pids_%d" % i p_ids_names.append(p_ids_name) p_enc = encoder(p_ids_name) m_i, g_i = attn_flow(q_enc, p_enc, p_ids_name) ms.append(m_i) gs.append(g_i) m = layers.sequence_concat(x=ms, axis = 0) g = layers.sequence_concat(x=gs, axis = 0) if args.simple_decode: m2 = bi_lstm_encoder(input_seq=m, gate_size=embedding_dim) gm1 = layers.concat(input=[g, m], axis = 1) gm2 = layers.concat(input=[g, m2], axis = 1) start_prob = layers.fc(input=gm1, size=1, act='softmax') end_prob = layers.fc(input=gm2, size=1, act='softmax') else: p_vec = layers.sequence_concat(x=m, axis = 0) q_vec = bi_lstm_encoder(input_seq=q_enc, gate_size=embedding_dim) start_prob, end_prob = point_network_decoder(p_vec=p_vec, q_vec=q_vec, decoder_size = decoder_size) start_prob = layers.sequence_softmax(start_prob) end_prob = layers.sequence_softmax(end_prob) pred = layers.concat(input=[start_prob, end_prob], axis = 0) #''' start_labels = layers.data( name="start_lables", shape=[1], dtype='float32', lod_level=1) end_labels = layers.data( name="end_lables", shape=[1], dtype='float32', lod_level=1) label = layers.concat(input=[start_labels, end_labels], axis=0) label.stop_gradient = True #compute loss cost = layers.cross_entropy(input=pred, label=label, soft_label=True) #cost = layers.cross_entropy(input=decode_out, label=end_labels, soft_label=True) cost = layers.reduce_sum(cost) / args.batch_size if args.debug == True: layers.Print(p1, message='p1') layers.Print(pred, message='pred') layers.Print(label, message='label') layers.Print(start_labels, message='start_labels') layers.Print(cost, message='cost') if args.single_doc: feeding_list = ['q_ids', "start_lables", "end_lables", 'p_ids'] else: feeding_list = ['q_ids', "start_lables", "end_lables" ] + p_ids_names return cost, feeding_list