예제 #1
0
def attn_flow(q_enc, p_enc, p_ids_name, args):
    """Bidirectional Attention layer"""
    tag = p_ids_name + "__"
    drnn = layers.DynamicRNN()
    with drnn.block():
        h_cur = drnn.step_input(p_enc)
        u_all = drnn.static_input(q_enc)
        h_expd = layers.sequence_expand(x=h_cur, y=u_all)
        s_t_mul = layers.elementwise_mul(x=u_all, y=h_expd, axis=0)
        s_t_sum = layers.reduce_sum(input=s_t_mul, dim=1, keep_dim=True)
        s_t_re = layers.reshape(s_t_sum, shape=[-1, 0])
        s_t = layers.sequence_softmax(input=s_t_re)
        u_expr = layers.elementwise_mul(x=u_all, y=s_t, axis=0)
        u_expr = layers.sequence_pool(input=u_expr, pool_type='sum')

        b_t = layers.sequence_pool(input=s_t_sum, pool_type='max')
        drnn.output(u_expr, b_t)
    U_expr, b = drnn()
    b_norm = layers.sequence_softmax(input=b)
    h_expr = layers.elementwise_mul(x=p_enc, y=b_norm, axis=0)
    h_expr = layers.sequence_pool(input=h_expr, pool_type='sum')

    H_expr = layers.sequence_expand(x=h_expr, y=p_enc)
    H_expr = layers.lod_reset(x=H_expr, y=p_enc)
    h_u = layers.elementwise_mul(x=p_enc, y=U_expr, axis=0)
    h_h = layers.elementwise_mul(x=p_enc, y=H_expr, axis=0)

    g = layers.concat(input=[p_enc, U_expr, h_u, h_h], axis=1)
    return dropout(g, args)
예제 #2
0
        def custom_dynamic_rnn(p_vec, init_state, decoder_size):
            context = layers.fc(input=p_vec,
			    size=decoder_size,
			    act=None)

	    drnn = layers.DynamicRNN()
	    with drnn.block():
		H_s = drnn.step_input(p_vec)
		ctx = drnn.static_input(context)

		c_prev = drnn.memory(init=init_state, need_reorder=True)
		m_prev = drnn.memory(init=init_state, need_reorder=True)
		m_prev1 = layers.fc(input=m_prev, size=decoder_size, act=None)
		m_prev1 = layers.sequence_expand(x=m_prev1, y=ctx)

		Fk = ctx + m_prev1
		Fk = layers.fc(input=Fk, size=decoder_size, act='tanh')
		logits = layers.fc(input=Fk, size=1, act=None)

		scores = layers.sequence_softmax(input=logits)
		attn_ctx = layers.elementwise_mul(x=ctx, y=scores, axis=0)
		attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum')
		hidden_t, cell_t = lstm_step(attn_ctx, hidden_t_prev=m_prev1, cell_t_prev=c_prev, size=decoder_size)

		drnn.update_memory(ex_mem=m_prev, new_mem=hidden_t)
		drnn.update_memory(ex_mem=c_prev, new_mem=cell_t)
      
		drnn.output(scores)
	    beta = drnn()
            return beta
예제 #3
0
 def test_sequence_softmax(self):
     program = Program()
     with program_guard(program):
         seq_data = layers.data(
             name='seq_data', shape=[10, 10], dtype='float32', lod_level=1)
         seq = layers.fc(input=seq_data, size=20)
         self.assertIsNotNone(layers.sequence_softmax(seq))
     print(str(program))
예제 #4
0
 def test_sequence_softmax(self):
     program = Program()
     with program_guard(program):
         seq_data = layers.data(
             name='seq_data', shape=[10, 10], dtype='float32', lod_level=1)
         seq = layers.fc(input=seq_data, size=20)
         self.assertIsNotNone(layers.sequence_softmax(seq))
     print(str(program))
예제 #5
0
        def static_rnn(step,
                       p_vec=p_vec,
                       init_state=None,
                       para_name='',
                       args=args):
            tag = para_name + "static_rnn_"
            ctx = layers.fc(
                input=p_vec,
                param_attr=fluid.ParamAttr(name=tag + 'context_fc_w'),
                bias_attr=fluid.ParamAttr(name=tag + 'context_fc_b'),
                size=hidden_size,
                act=None)

            beta = []
            c_prev = init_state
            m_prev = init_state
            for i in range(step):
                m_prev0 = layers.fc(
                    input=m_prev,
                    size=hidden_size,
                    act=None,
                    param_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_w'),
                    bias_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_b'))
                m_prev1 = layers.sequence_expand(x=m_prev0, y=ctx)

                Fk = ctx + m_prev1
                Fk = layers.tanh(Fk)
                logits = layers.fc(
                    input=Fk,
                    size=1,
                    act=None,
                    param_attr=fluid.ParamAttr(name=tag + 'logits_fc_w'),
                    bias_attr=fluid.ParamAttr(name=tag + 'logits_fc_b'))

                scores = layers.sequence_softmax(input=logits)
                attn_ctx = layers.elementwise_mul(x=p_vec, y=scores, axis=0)
                attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum')

                hidden_t, cell_t = lstm_step(
                    attn_ctx,
                    hidden_t_prev=m_prev,
                    cell_t_prev=c_prev,
                    size=hidden_size,
                    para_name=tag,
                    args=args)
                m_prev = hidden_t
                c_prev = cell_t
                beta.append(scores)
            return beta
예제 #6
0
 def simple_attention(self, encoder_vec, encoder_proj, decoder_state,
                      decoder_size):
     decoder_state_proj = layers.fc(input=decoder_state,
                                    size=decoder_size,
                                    bias_attr=False,
                                    name="decoder_state_proj_fc")
     decoder_state_expand = layers.sequence_expand(
         x=decoder_state_proj, y=encoder_proj)
     concated = layers.elementwise_add(encoder_proj, decoder_state_expand)
     concated = layers.tanh(x=concated)
     attention_weights = layers.fc(input=concated,
                                   size=1,
                                   act=None,
                                   bias_attr=False,
                                   name="attention_weights_fc")
     attention_weights = layers.sequence_softmax(input=attention_weights)
     weigths_reshape = layers.reshape(x=attention_weights, shape=[-1])
     scaled = layers.elementwise_mul(
         x=encoder_vec, y=weigths_reshape, axis=0)
     context = layers.sequence_pool(input=scaled, pool_type='sum')
     return context
예제 #7
0
def point_network_decoder(p_vec, q_vec, hidden_size, args):
    """Output layer - pointer network"""
    tag = 'pn_decoder_'
    init_random = fluid.initializer.Normal(loc=0.0, scale=1.0)

    random_attn = layers.create_parameter(
        shape=[1, hidden_size],
        dtype='float32',
        default_initializer=init_random)
    random_attn = layers.fc(
        input=random_attn,
        size=hidden_size,
        act=None,
        param_attr=fluid.ParamAttr(name=tag + 'random_attn_fc_w'),
        bias_attr=fluid.ParamAttr(name=tag + 'random_attn_fc_b'))
    random_attn = layers.reshape(random_attn, shape=[-1])
    U = layers.fc(input=q_vec,
                  param_attr=fluid.ParamAttr(name=tag + 'q_vec_fc_w'),
                  bias_attr=False,
                  size=hidden_size,
                  act=None) + random_attn
    U = layers.tanh(U)

    logits = layers.fc(input=U,
                       param_attr=fluid.ParamAttr(name=tag + 'logits_fc_w'),
                       bias_attr=fluid.ParamAttr(name=tag + 'logits_fc_b'),
                       size=1,
                       act=None)
    scores = layers.sequence_softmax(input=logits)
    pooled_vec = layers.elementwise_mul(x=q_vec, y=scores, axis=0)
    pooled_vec = layers.sequence_pool(input=pooled_vec, pool_type='sum')

    init_state = layers.fc(
        input=pooled_vec,
        param_attr=fluid.ParamAttr(name=tag + 'init_state_fc_w'),
        bias_attr=fluid.ParamAttr(name=tag + 'init_state_fc_b'),
        size=hidden_size,
        act=None)

    def custom_dynamic_rnn(p_vec, init_state, hidden_size, para_name, args):
        tag = para_name + "custom_dynamic_rnn_"

        def static_rnn(step,
                       p_vec=p_vec,
                       init_state=None,
                       para_name='',
                       args=args):
            tag = para_name + "static_rnn_"
            ctx = layers.fc(
                input=p_vec,
                param_attr=fluid.ParamAttr(name=tag + 'context_fc_w'),
                bias_attr=fluid.ParamAttr(name=tag + 'context_fc_b'),
                size=hidden_size,
                act=None)

            beta = []
            c_prev = init_state
            m_prev = init_state
            for i in range(step):
                m_prev0 = layers.fc(
                    input=m_prev,
                    size=hidden_size,
                    act=None,
                    param_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_w'),
                    bias_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_b'))
                m_prev1 = layers.sequence_expand(x=m_prev0, y=ctx)

                Fk = ctx + m_prev1
                Fk = layers.tanh(Fk)
                logits = layers.fc(
                    input=Fk,
                    size=1,
                    act=None,
                    param_attr=fluid.ParamAttr(name=tag + 'logits_fc_w'),
                    bias_attr=fluid.ParamAttr(name=tag + 'logits_fc_b'))

                scores = layers.sequence_softmax(input=logits)
                attn_ctx = layers.elementwise_mul(x=p_vec, y=scores, axis=0)
                attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum')

                hidden_t, cell_t = lstm_step(
                    attn_ctx,
                    hidden_t_prev=m_prev,
                    cell_t_prev=c_prev,
                    size=hidden_size,
                    para_name=tag,
                    args=args)
                m_prev = hidden_t
                c_prev = cell_t
                beta.append(scores)
            return beta

        return static_rnn(
            2, p_vec=p_vec, init_state=init_state, para_name=para_name)

    fw_outputs = custom_dynamic_rnn(p_vec, init_state, hidden_size, tag + "fw_",
                                    args)
    bw_outputs = custom_dynamic_rnn(p_vec, init_state, hidden_size, tag + "bw_",
                                    args)

    start_prob = layers.elementwise_add(
        x=fw_outputs[0], y=bw_outputs[1], axis=0) / 2
    end_prob = layers.elementwise_add(
        x=fw_outputs[1], y=bw_outputs[0], axis=0) / 2

    return start_prob, end_prob
예제 #8
0
    def attn_flow(q_enc, p_enc, p_ids_name):
        tag = p_ids_name + "::" 
	drnn = layers.DynamicRNN()
	with drnn.block():
	    h_cur = drnn.step_input(p_enc)
	    u_all = drnn.static_input(q_enc)
	    h_expd = layers.sequence_expand(x=h_cur, y=u_all)
	    s_t_ = layers.elementwise_mul(x=u_all, y=h_expd, axis=0)
	    s_t1 = layers.reduce_sum(input=s_t_, dim=1) 
	    s_t = layers.sequence_softmax(input=s_t1)
	    u_expr = layers.elementwise_mul(x=u_all, y=s_t, axis=0)
	    u_expr = layers.sequence_pool(input=u_expr, pool_type='sum') 
	    
     
	    if args.debug == True:
		'''
		layers.Print(h_expd, message='h_expd')
		layers.Print(h_cur, message='h_cur')
		layers.Print(u_all, message='u_all')
		layers.Print(s_t, message='s_t')
		layers.Print(s_t_, message='s_t_')
		layers.Print(u_expr, message='u_expr')
		'''
	    drnn.output(u_expr)
	    
	U_expr = drnn() 
	#'''
	drnn2 = layers.DynamicRNN()
	with drnn2.block():
	    h_cur = drnn2.step_input(p_enc)
	    u_all = drnn2.static_input(q_enc)
	    h_expd = layers.sequence_expand(x=h_cur, y=u_all)
	    s_t_ = layers.elementwise_mul(x=u_all, y=h_expd, axis=0)
	    s_t2 = layers.reduce_sum(input=s_t_, dim=1, keep_dim=True) 
	    b_t = layers.sequence_pool(input=s_t2, pool_type='max') 
	   
     
	    if args.debug == True:
		'''
		layers.Print(s_t2, message='s_t2')
		layers.Print(b_t, message='b_t')
		'''
	    drnn2.output(b_t)
	b = drnn2()
	b_norm = layers.sequence_softmax(input=b) 
	h_expr = layers.elementwise_mul(x=p_enc, y=b_norm, axis=0)
	h_expr = layers.sequence_pool(input=h_expr, pool_type='sum') 
	    

	H_expr = layers.sequence_expand(x=h_expr, y=p_enc)
	H_expr = layers.lod_reset(x=H_expr, y=p_enc) 
	h_u = layers.elementwise_mul(x=H_expr, y=U_expr, axis=0)
	h_h = layers.elementwise_mul(x=H_expr, y=p_enc, axis=0) 
	
	g = layers.concat(input=[H_expr, U_expr, h_u, h_h], axis = 1) 

        #fusion
	m = bi_lstm_encoder(input_seq=g, gate_size=embedding_dim) 
	if args.debug == True:
	    layers.Print(U_expr, message=tag + 'U_expr')
	    layers.Print(H_expr, message=tag + 'H_expr')
	    layers.Print(b, message=tag + 'b')
	    layers.Print(b_norm, message=tag + 'b_norm')
	    layers.Print(g, message=tag +'g')
	    layers.Print(m, message=tag + 'm')
	    layers.Print(h_h, message=tag + 'h_h')
	    layers.Print(q_enc, message=tag + 'q_enc')
	    layers.Print(p_enc, message=tag + 'p_enc')
       
        return m, g
예제 #9
0
def bidaf(embedding_dim, encoder_size, decoder_size, source_dict_dim,
                   target_dict_dim,  max_length, args):
    def bi_lstm_encoder(input_seq, gate_size):
        # A bi-directional lstm encoder implementation.
        # Linear transformation part for input gate, output gate, forget gate
        # and cell activation vectors need be done outside of dynamic_lstm.
        # So the output size is 4 times of gate_size.
        input_forward_proj = layers.fc(input=input_seq,
                                             size=gate_size * 4,
                                             act='tanh',
                                             bias_attr=False)
        forward, _ = layers.dynamic_lstm(
            input=input_forward_proj, size=gate_size * 4, use_peepholes=False)
        input_reversed_proj = layers.fc(input=input_seq,
                                              size=gate_size * 4,
                                              act='tanh',
                                              bias_attr=False)
        reversed, _ = layers.dynamic_lstm(
            input=input_reversed_proj,
            size=gate_size * 4,
            is_reverse=True,
            use_peepholes=False)
        encoder_out = layers.concat(input=[forward, reversed], axis = 1)
        return encoder_out

    def encoder(input_name):
        input_ids = layers.data(
            name=input_name, shape=[1], dtype='int64', lod_level=1)
        input_embedding = layers.embedding(
            input=input_ids,
            size=[source_dict_dim, embedding_dim],
            dtype='float32',
            is_sparse=True)
        encoder_out = bi_lstm_encoder(input_seq=input_embedding, gate_size=embedding_dim)
        return encoder_out

    def attn_flow(q_enc, p_enc, p_ids_name):
        tag = p_ids_name + "::" 
	drnn = layers.DynamicRNN()
	with drnn.block():
	    h_cur = drnn.step_input(p_enc)
	    u_all = drnn.static_input(q_enc)
	    h_expd = layers.sequence_expand(x=h_cur, y=u_all)
	    s_t_ = layers.elementwise_mul(x=u_all, y=h_expd, axis=0)
	    s_t1 = layers.reduce_sum(input=s_t_, dim=1) 
	    s_t = layers.sequence_softmax(input=s_t1)
	    u_expr = layers.elementwise_mul(x=u_all, y=s_t, axis=0)
	    u_expr = layers.sequence_pool(input=u_expr, pool_type='sum') 
	    
     
	    if args.debug == True:
		'''
		layers.Print(h_expd, message='h_expd')
		layers.Print(h_cur, message='h_cur')
		layers.Print(u_all, message='u_all')
		layers.Print(s_t, message='s_t')
		layers.Print(s_t_, message='s_t_')
		layers.Print(u_expr, message='u_expr')
		'''
	    drnn.output(u_expr)
	    
	U_expr = drnn() 
	#'''
	drnn2 = layers.DynamicRNN()
	with drnn2.block():
	    h_cur = drnn2.step_input(p_enc)
	    u_all = drnn2.static_input(q_enc)
	    h_expd = layers.sequence_expand(x=h_cur, y=u_all)
	    s_t_ = layers.elementwise_mul(x=u_all, y=h_expd, axis=0)
	    s_t2 = layers.reduce_sum(input=s_t_, dim=1, keep_dim=True) 
	    b_t = layers.sequence_pool(input=s_t2, pool_type='max') 
	   
     
	    if args.debug == True:
		'''
		layers.Print(s_t2, message='s_t2')
		layers.Print(b_t, message='b_t')
		'''
	    drnn2.output(b_t)
	b = drnn2()
	b_norm = layers.sequence_softmax(input=b) 
	h_expr = layers.elementwise_mul(x=p_enc, y=b_norm, axis=0)
	h_expr = layers.sequence_pool(input=h_expr, pool_type='sum') 
	    

	H_expr = layers.sequence_expand(x=h_expr, y=p_enc)
	H_expr = layers.lod_reset(x=H_expr, y=p_enc) 
	h_u = layers.elementwise_mul(x=H_expr, y=U_expr, axis=0)
	h_h = layers.elementwise_mul(x=H_expr, y=p_enc, axis=0) 
	
	g = layers.concat(input=[H_expr, U_expr, h_u, h_h], axis = 1) 

        #fusion
	m = bi_lstm_encoder(input_seq=g, gate_size=embedding_dim) 
	if args.debug == True:
	    layers.Print(U_expr, message=tag + 'U_expr')
	    layers.Print(H_expr, message=tag + 'H_expr')
	    layers.Print(b, message=tag + 'b')
	    layers.Print(b_norm, message=tag + 'b_norm')
	    layers.Print(g, message=tag +'g')
	    layers.Print(m, message=tag + 'm')
	    layers.Print(h_h, message=tag + 'h_h')
	    layers.Print(q_enc, message=tag + 'q_enc')
	    layers.Print(p_enc, message=tag + 'p_enc')
       
        return m, g
    
    def lstm_step(x_t, hidden_t_prev, cell_t_prev, size):
	def linear(inputs):
	    return layers.fc(input=inputs, size=size, bias_attr=True)

	forget_gate = layers.sigmoid(x=linear([hidden_t_prev, x_t]))
	input_gate = layers.sigmoid(x=linear([hidden_t_prev, x_t]))
	output_gate = layers.sigmoid(x=linear([hidden_t_prev, x_t]))
	cell_tilde = layers.tanh(x=linear([hidden_t_prev, x_t]))

	cell_t = layers.sums(input=[
	    layers.elementwise_mul(
		x=forget_gate, y=cell_t_prev), layers.elementwise_mul(
		    x=input_gate, y=cell_tilde)
	])

	hidden_t = layers.elementwise_mul(
	    x=output_gate, y=layers.tanh(x=cell_t))

	return hidden_t, cell_t 
    
    #point network
    def point_network_decoder(p_vec, q_vec, decoder_size):
        random_attn = layers.gaussian_random(shape=[1, decoder_size])
	random_attn = layers.sequence_expand(x=random_attn, y=q_vec)
        random_attn = layers.fc(input=random_attn, size=decoder_size, act=None)
        U = layers.fc(input=q_vec,
			    size=decoder_size,
			    act=None) + random_attn
        U = layers.tanh(U)
        
        logits = layers.fc(input=U,
			    size=1,
			    act=None)
        scores = layers.sequence_softmax(input=logits)
	pooled_vec = layers.elementwise_mul(x=q_vec, y=scores, axis=0)
	pooled_vec = layers.sequence_pool(input=pooled_vec, pool_type='sum')

        init_state = layers.fc(input=pooled_vec,
			    size=decoder_size,
			    act=None)

        def custom_dynamic_rnn(p_vec, init_state, decoder_size):
            context = layers.fc(input=p_vec,
			    size=decoder_size,
			    act=None)

	    drnn = layers.DynamicRNN()
	    with drnn.block():
		H_s = drnn.step_input(p_vec)
		ctx = drnn.static_input(context)

		c_prev = drnn.memory(init=init_state, need_reorder=True)
		m_prev = drnn.memory(init=init_state, need_reorder=True)
		m_prev1 = layers.fc(input=m_prev, size=decoder_size, act=None)
		m_prev1 = layers.sequence_expand(x=m_prev1, y=ctx)

		Fk = ctx + m_prev1
		Fk = layers.fc(input=Fk, size=decoder_size, act='tanh')
		logits = layers.fc(input=Fk, size=1, act=None)

		scores = layers.sequence_softmax(input=logits)
		attn_ctx = layers.elementwise_mul(x=ctx, y=scores, axis=0)
		attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum')
		hidden_t, cell_t = lstm_step(attn_ctx, hidden_t_prev=m_prev1, cell_t_prev=c_prev, size=decoder_size)

		drnn.update_memory(ex_mem=m_prev, new_mem=hidden_t)
		drnn.update_memory(ex_mem=c_prev, new_mem=cell_t)
      
		drnn.output(scores)
	    beta = drnn()
            return beta

        fw_outputs = custom_dynamic_rnn(p_vec, init_state, decoder_size) 
        bw_outputs = custom_dynamic_rnn(p_vec, init_state, decoder_size)
       
        def sequence_slice(x, index):
            #offset = layers.fill_constant(shape=[1, args.batch_size], value=index, dtype='float32')
            #length = layers.fill_constant(shape=[1, args.batch_size], value=1, dtype='float32')
            #return layers.sequence_slice(x, offset, length)
            idx = layers.fill_constant(shape=[1], value=1, dtype='int32')
            idx.stop_gradient = True
            from paddle.fluid.layers.control_flow import lod_rank_table 
            from paddle.fluid.layers.control_flow import lod_tensor_to_array 
            from paddle.fluid.layers.control_flow import array_read 
            from paddle.fluid.layers.control_flow import array_to_lod_tensor 
            table = lod_rank_table(x, level=0)
            table.stop_gradient = True
            array = lod_tensor_to_array(x, table)
            slice_array = array_read(array=array, i=idx)
            return array_to_lod_tensor(slice_array, table)
        
        start_prob = layers.elementwise_mul(x=sequence_slice(fw_outputs, 0), y=sequence_slice(bw_outputs, 1), axis=0) / 2
        end_prob = layers.elementwise_mul(x=sequence_slice(fw_outputs, 1), y=sequence_slice(bw_outputs, 0), axis=0) / 2
        return start_prob, end_prob
 
 
    q_enc = encoder('q_ids')

    if args.single_doc:
        p_enc = encoder('p_ids')
        m, g = attn_flow(q_enc, p_enc, 'p_ids')
        
    else:
        p_ids_names = []
        ms = []
        gs = []
	for i in range(args.doc_num):
	    p_ids_name = "pids_%d" % i
	    p_ids_names.append(p_ids_name)
	    p_enc = encoder(p_ids_name)
	    
	    m_i, g_i = attn_flow(q_enc, p_enc, p_ids_name)
	    ms.append(m_i)
	    gs.append(g_i)
	    m = layers.sequence_concat(x=ms, axis = 0) 
	    g = layers.sequence_concat(x=gs, axis = 0) 
            
    if args.simple_decode:
        m2 = bi_lstm_encoder(input_seq=m, gate_size=embedding_dim)
        
        gm1 = layers.concat(input=[g, m], axis = 1) 
        gm2 = layers.concat(input=[g, m2], axis = 1) 
        start_prob = layers.fc(input=gm1, size=1, act='softmax')
        end_prob = layers.fc(input=gm2, size=1, act='softmax')
    else:

	p_vec = layers.sequence_concat(x=m, axis = 0) 
	q_vec = bi_lstm_encoder(input_seq=q_enc, gate_size=embedding_dim)
        start_prob, end_prob = point_network_decoder(p_vec=p_vec, q_vec=q_vec, decoder_size = decoder_size)

    start_prob = layers.sequence_softmax(start_prob)
    end_prob = layers.sequence_softmax(end_prob)

    pred = layers.concat(input=[start_prob, end_prob], axis = 0) 
    #'''
    start_labels = layers.data(
	name="start_lables", shape=[1], dtype='float32', lod_level=1)
    
    end_labels = layers.data(
	name="end_lables", shape=[1], dtype='float32', lod_level=1)
    
    label = layers.concat(input=[start_labels, end_labels], axis=0)
    label.stop_gradient = True

    #compute loss
    cost = layers.cross_entropy(input=pred, label=label, soft_label=True)
    #cost = layers.cross_entropy(input=decode_out, label=end_labels, soft_label=True)
    cost = layers.reduce_sum(cost) / args.batch_size
     
    if args.debug == True:
        layers.Print(p1, message='p1')
        layers.Print(pred, message='pred')
        layers.Print(label, message='label')
        layers.Print(start_labels, message='start_labels')
        layers.Print(cost, message='cost')
    
    if args.single_doc:
        feeding_list = ['q_ids',  "start_lables", "end_lables", 'p_ids']
    else:
        feeding_list = ['q_ids',  "start_lables", "end_lables" ] + p_ids_names
    return cost, feeding_list
예제 #10
0
    def point_network_decoder(p_vec, q_vec, decoder_size):
        random_attn = layers.gaussian_random(shape=[1, decoder_size])
	random_attn = layers.sequence_expand(x=random_attn, y=q_vec)
        random_attn = layers.fc(input=random_attn, size=decoder_size, act=None)
        U = layers.fc(input=q_vec,
			    size=decoder_size,
			    act=None) + random_attn
        U = layers.tanh(U)
        
        logits = layers.fc(input=U,
			    size=1,
			    act=None)
        scores = layers.sequence_softmax(input=logits)
	pooled_vec = layers.elementwise_mul(x=q_vec, y=scores, axis=0)
	pooled_vec = layers.sequence_pool(input=pooled_vec, pool_type='sum')

        init_state = layers.fc(input=pooled_vec,
			    size=decoder_size,
			    act=None)

        def custom_dynamic_rnn(p_vec, init_state, decoder_size):
            context = layers.fc(input=p_vec,
			    size=decoder_size,
			    act=None)

	    drnn = layers.DynamicRNN()
	    with drnn.block():
		H_s = drnn.step_input(p_vec)
		ctx = drnn.static_input(context)

		c_prev = drnn.memory(init=init_state, need_reorder=True)
		m_prev = drnn.memory(init=init_state, need_reorder=True)
		m_prev1 = layers.fc(input=m_prev, size=decoder_size, act=None)
		m_prev1 = layers.sequence_expand(x=m_prev1, y=ctx)

		Fk = ctx + m_prev1
		Fk = layers.fc(input=Fk, size=decoder_size, act='tanh')
		logits = layers.fc(input=Fk, size=1, act=None)

		scores = layers.sequence_softmax(input=logits)
		attn_ctx = layers.elementwise_mul(x=ctx, y=scores, axis=0)
		attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum')
		hidden_t, cell_t = lstm_step(attn_ctx, hidden_t_prev=m_prev1, cell_t_prev=c_prev, size=decoder_size)

		drnn.update_memory(ex_mem=m_prev, new_mem=hidden_t)
		drnn.update_memory(ex_mem=c_prev, new_mem=cell_t)
      
		drnn.output(scores)
	    beta = drnn()
            return beta

        fw_outputs = custom_dynamic_rnn(p_vec, init_state, decoder_size) 
        bw_outputs = custom_dynamic_rnn(p_vec, init_state, decoder_size)
       
        def sequence_slice(x, index):
            #offset = layers.fill_constant(shape=[1, args.batch_size], value=index, dtype='float32')
            #length = layers.fill_constant(shape=[1, args.batch_size], value=1, dtype='float32')
            #return layers.sequence_slice(x, offset, length)
            idx = layers.fill_constant(shape=[1], value=1, dtype='int32')
            idx.stop_gradient = True
            from paddle.fluid.layers.control_flow import lod_rank_table 
            from paddle.fluid.layers.control_flow import lod_tensor_to_array 
            from paddle.fluid.layers.control_flow import array_read 
            from paddle.fluid.layers.control_flow import array_to_lod_tensor 
            table = lod_rank_table(x, level=0)
            table.stop_gradient = True
            array = lod_tensor_to_array(x, table)
            slice_array = array_read(array=array, i=idx)
            return array_to_lod_tensor(slice_array, table)
        
        start_prob = layers.elementwise_mul(x=sequence_slice(fw_outputs, 0), y=sequence_slice(bw_outputs, 1), axis=0) / 2
        end_prob = layers.elementwise_mul(x=sequence_slice(fw_outputs, 1), y=sequence_slice(bw_outputs, 0), axis=0) / 2
        return start_prob, end_prob