Beispiel #1
0
def attn_flow(q_enc, p_enc, p_ids_name, args):
    """Bidirectional Attention layer"""
    tag = p_ids_name + "__"
    drnn = layers.DynamicRNN()
    with drnn.block():
        h_cur = drnn.step_input(p_enc)
        u_all = drnn.static_input(q_enc)
        h_expd = layers.sequence_expand(x=h_cur, y=u_all)
        s_t_mul = layers.elementwise_mul(x=u_all, y=h_expd, axis=0)
        s_t_sum = layers.reduce_sum(input=s_t_mul, dim=1, keep_dim=True)
        s_t_re = layers.reshape(s_t_sum, shape=[-1, 0])
        s_t = layers.sequence_softmax(input=s_t_re)
        u_expr = layers.elementwise_mul(x=u_all, y=s_t, axis=0)
        u_expr = layers.sequence_pool(input=u_expr, pool_type='sum')

        b_t = layers.sequence_pool(input=s_t_sum, pool_type='max')
        drnn.output(u_expr, b_t)
    U_expr, b = drnn()
    b_norm = layers.sequence_softmax(input=b)
    h_expr = layers.elementwise_mul(x=p_enc, y=b_norm, axis=0)
    h_expr = layers.sequence_pool(input=h_expr, pool_type='sum')

    H_expr = layers.sequence_expand(x=h_expr, y=p_enc)
    H_expr = layers.lod_reset(x=H_expr, y=p_enc)
    h_u = layers.elementwise_mul(x=p_enc, y=U_expr, axis=0)
    h_h = layers.elementwise_mul(x=p_enc, y=H_expr, axis=0)

    g = layers.concat(input=[p_enc, U_expr, h_u, h_h], axis=1)
    return dropout(g, args)
def decoder_train(context, is_sparse):
    # decoder
    trg_language_word = pd.data(name="target_language_word",
                                shape=[1],
                                dtype='int64',
                                lod_level=1)
    trg_embedding = pd.embedding(input=trg_language_word,
                                 size=[dict_size, word_dim],
                                 dtype='float32',
                                 is_sparse=is_sparse,
                                 param_attr=fluid.ParamAttr(name='vemb'))

    rnn = pd.DynamicRNN()
    with rnn.block():
        current_word = rnn.step_input(trg_embedding)
        pre_state = rnn.memory(init=context)
        current_state = pd.fc(input=[current_word, pre_state],
                              size=decoder_size,
                              act='tanh')

        current_score = pd.fc(input=current_state,
                              size=target_dict_dim,
                              act='softmax')
        rnn.update_memory(pre_state, current_state)
        rnn.output(current_score)

    return rnn()
Beispiel #3
0
 def gru_decoder_with_attention(self, target_embedding, encoder_vec,
                                encoder_proj, decoder_boot, decoder_size,
                                char_num):
     rnn = layers.DynamicRNN()
     with rnn.block():
         current_word = rnn.step_input(target_embedding)
         encoder_vec = rnn.static_input(encoder_vec)
         encoder_proj = rnn.static_input(encoder_proj)
         hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True)
         context = self.simple_attention(encoder_vec, encoder_proj,
                                         hidden_mem, decoder_size)
         fc_1 = layers.fc(input=context,
                          size=decoder_size * 3,
                          bias_attr=False,
                          name="rnn_fc1")
         fc_2 = layers.fc(input=current_word,
                          size=decoder_size * 3,
                          bias_attr=False,
                          name="rnn_fc2")
         decoder_inputs = fc_1 + fc_2
         h, _, _ = layers.gru_unit(
             input=decoder_inputs, hidden=hidden_mem, size=decoder_size * 3)
         rnn.update_memory(hidden_mem, h)
         out = layers.fc(input=h,
                         size=char_num,
                         bias_attr=True,
                         act='softmax',
                         name="rnn_out_fc")
         rnn.output(out)
     return rnn()
Beispiel #4
0
def rc_model(hidden_size, vocab, args):
    emb_shape = [vocab.size(), vocab.embed_dim]
    start_labels = layers.data(name="start_lables",
                               shape=[1],
                               dtype='float32',
                               lod_level=1)
    end_labels = layers.data(name="end_lables",
                             shape=[1],
                             dtype='float32',
                             lod_level=1)

    # stage 1:encode
    q_id0 = get_data('q_id0', 1, args)

    q_ids = get_data('q_ids', 2, args)
    p_ids_name = 'p_ids'

    p_ids = get_data('p_ids', 2, args)
    p_embs = embedding(p_ids, emb_shape, args)
    q_embs = embedding(q_ids, emb_shape, args)
    drnn = layers.DynamicRNN()
    with drnn.block():
        p_emb = drnn.step_input(p_embs)
        q_emb = drnn.step_input(q_embs)

        p_enc = encoder(p_emb, 'p_enc', hidden_size, args)
        q_enc = encoder(q_emb, 'q_enc', hidden_size, args)

        # stage 2:match
        g_i = attn_flow(q_enc, p_enc, p_ids_name, args)
        # stage 3:fusion
        m_i = fusion(g_i, args)
        drnn.output(m_i, q_enc)

    ms, q_encs = drnn()
    p_vec = layers.lod_reset(x=ms, y=start_labels)
    q_vec = layers.lod_reset(x=q_encs, y=q_id0)

    # stage 4:decode
    start_probs, end_probs = point_network_decoder(p_vec=p_vec,
                                                   q_vec=q_vec,
                                                   hidden_size=hidden_size,
                                                   args=args)

    cost0 = layers.sequence_pool(
        layers.cross_entropy(input=start_probs,
                             label=start_labels,
                             soft_label=True), 'sum')
    cost1 = layers.sequence_pool(
        layers.cross_entropy(input=end_probs,
                             label=end_labels,
                             soft_label=True), 'sum')

    cost0 = layers.mean(cost0)
    cost1 = layers.mean(cost1)
    cost = cost0 + cost1
    cost.persistable = True

    feeding_list = ["q_ids", "start_lables", "end_lables", "p_ids", "q_id0"]
    return cost, start_probs, end_probs, ms, feeding_list
Beispiel #5
0
        def custom_dynamic_rnn(p_vec, init_state, decoder_size):
            context = layers.fc(input=p_vec,
			    size=decoder_size,
			    act=None)

	    drnn = layers.DynamicRNN()
	    with drnn.block():
		H_s = drnn.step_input(p_vec)
		ctx = drnn.static_input(context)

		c_prev = drnn.memory(init=init_state, need_reorder=True)
		m_prev = drnn.memory(init=init_state, need_reorder=True)
		m_prev1 = layers.fc(input=m_prev, size=decoder_size, act=None)
		m_prev1 = layers.sequence_expand(x=m_prev1, y=ctx)

		Fk = ctx + m_prev1
		Fk = layers.fc(input=Fk, size=decoder_size, act='tanh')
		logits = layers.fc(input=Fk, size=1, act=None)

		scores = layers.sequence_softmax(input=logits)
		attn_ctx = layers.elementwise_mul(x=ctx, y=scores, axis=0)
		attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum')
		hidden_t, cell_t = lstm_step(attn_ctx, hidden_t_prev=m_prev1, cell_t_prev=c_prev, size=decoder_size)

		drnn.update_memory(ex_mem=m_prev, new_mem=hidden_t)
		drnn.update_memory(ex_mem=c_prev, new_mem=cell_t)
      
		drnn.output(scores)
	    beta = drnn()
            return beta
Beispiel #6
0
 def dynamic_rnn_net(self):
     x = layers.data(shape=[BATCH_SIZE * SEQ_LEN, INPUT_DIM],
                     dtype="float32",
                     name="x",
                     append_batch_size=False)
     x.stop_gradient = False
     rnn = layers.DynamicRNN()
     with rnn.block():
         x_t = rnn.step_input(x)
         h_pre = rnn.memory(shape=[INPUT_DIM])
         h = layers.scale(x=layers.elementwise_add(x=h_pre, y=x_t),
                          scale=self.scale)
         rnn.update_memory(h_pre, h)
         rnn.output(h)
     return layers.mean(rnn())
def rc_model(hidden_size, vocab, args):
    """This function build the whole BiDAF network"""
    emb_shape = [vocab.size(), vocab.embed_dim]
    start_labels = layers.data(name="start_lables",
                               shape=[1],
                               dtype='float32',
                               lod_level=1)
    end_labels = layers.data(name="end_lables",
                             shape=[1],
                             dtype='float32',
                             lod_level=1)

    # stage 1:setup input data, embedding table & encode
    """
    def get_data(input_name, lod_level, args):
        input_ids = layers.data(
            name=input_name, shape=[1], dtype='int64', lod_level=lod_level)
        return input_ids
    """
    q_id0 = get_data('q_id0', 1, args)
    q_ids = get_data('q_ids', 2, args)
    p_ids_name = 'p_ids'
    p_ids = get_data('p_ids', 2, args)
    """
    def embedding(input_ids, shape, args):  # Embedding layer
        input_embedding = layers.embedding(
            input=input_ids,
            size=shape,
            dtype='float32',
            is_sparse=True,
            param_attr=fluid.ParamAttr(name='embedding_para'))
        return input_embedding
    """
    p_embs = embedding(p_ids, emb_shape,
                       args)  # emb_shape = [vocab.size(), vocab.embed_dim]
    q_embs = embedding(q_ids, emb_shape, args)
    drnn = layers.DynamicRNN()
    with drnn.block():
        p_emb = drnn.step_input(p_embs)  # step_input()将序列标记为动态RNN输入
        q_emb = drnn.step_input(q_embs)

        p_enc = encoder(p_emb, 'p_enc', hidden_size, args)  # BiLSTM
        q_enc = encoder(q_emb, 'q_enc', hidden_size, args)  # BiLSTM

        # stage 2:match
        g_i = attn_flow(q_enc, p_enc, p_ids_name, args)
        # stage 3:fusion
        m_i = fusion(g_i, args)
        drnn.output(m_i, q_enc)

    ms, q_encs = drnn()
    p_vec = layers.lod_reset(x=ms, y=start_labels)
    q_vec = layers.lod_reset(x=q_encs, y=q_id0)

    # stage 4:decode
    start_probs, end_probs = point_network_decoder(p_vec=p_vec,
                                                   q_vec=q_vec,
                                                   hidden_size=hidden_size,
                                                   args=args)

    # calculate model loss
    cost0 = layers.sequence_pool(
        layers.cross_entropy(input=start_probs,
                             label=start_labels,
                             soft_label=True), 'sum')
    cost1 = layers.sequence_pool(
        layers.cross_entropy(input=end_probs,
                             label=end_labels,
                             soft_label=True), 'sum')

    cost0 = layers.mean(cost0)
    cost1 = layers.mean(cost1)
    cost = cost0 + cost1
    cost.persistable = True

    feeding_list = ["q_ids", "start_lables", "end_lables", "p_ids", "q_id0"]
    return cost, start_probs, end_probs, ms, feeding_list
Beispiel #8
0
    def attn_flow(q_enc, p_enc, p_ids_name):
        tag = p_ids_name + "::" 
	drnn = layers.DynamicRNN()
	with drnn.block():
	    h_cur = drnn.step_input(p_enc)
	    u_all = drnn.static_input(q_enc)
	    h_expd = layers.sequence_expand(x=h_cur, y=u_all)
	    s_t_ = layers.elementwise_mul(x=u_all, y=h_expd, axis=0)
	    s_t1 = layers.reduce_sum(input=s_t_, dim=1) 
	    s_t = layers.sequence_softmax(input=s_t1)
	    u_expr = layers.elementwise_mul(x=u_all, y=s_t, axis=0)
	    u_expr = layers.sequence_pool(input=u_expr, pool_type='sum') 
	    
     
	    if args.debug == True:
		'''
		layers.Print(h_expd, message='h_expd')
		layers.Print(h_cur, message='h_cur')
		layers.Print(u_all, message='u_all')
		layers.Print(s_t, message='s_t')
		layers.Print(s_t_, message='s_t_')
		layers.Print(u_expr, message='u_expr')
		'''
	    drnn.output(u_expr)
	    
	U_expr = drnn() 
	#'''
	drnn2 = layers.DynamicRNN()
	with drnn2.block():
	    h_cur = drnn2.step_input(p_enc)
	    u_all = drnn2.static_input(q_enc)
	    h_expd = layers.sequence_expand(x=h_cur, y=u_all)
	    s_t_ = layers.elementwise_mul(x=u_all, y=h_expd, axis=0)
	    s_t2 = layers.reduce_sum(input=s_t_, dim=1, keep_dim=True) 
	    b_t = layers.sequence_pool(input=s_t2, pool_type='max') 
	   
     
	    if args.debug == True:
		'''
		layers.Print(s_t2, message='s_t2')
		layers.Print(b_t, message='b_t')
		'''
	    drnn2.output(b_t)
	b = drnn2()
	b_norm = layers.sequence_softmax(input=b) 
	h_expr = layers.elementwise_mul(x=p_enc, y=b_norm, axis=0)
	h_expr = layers.sequence_pool(input=h_expr, pool_type='sum') 
	    

	H_expr = layers.sequence_expand(x=h_expr, y=p_enc)
	H_expr = layers.lod_reset(x=H_expr, y=p_enc) 
	h_u = layers.elementwise_mul(x=H_expr, y=U_expr, axis=0)
	h_h = layers.elementwise_mul(x=H_expr, y=p_enc, axis=0) 
	
	g = layers.concat(input=[H_expr, U_expr, h_u, h_h], axis = 1) 

        #fusion
	m = bi_lstm_encoder(input_seq=g, gate_size=embedding_dim) 
	if args.debug == True:
	    layers.Print(U_expr, message=tag + 'U_expr')
	    layers.Print(H_expr, message=tag + 'H_expr')
	    layers.Print(b, message=tag + 'b')
	    layers.Print(b_norm, message=tag + 'b_norm')
	    layers.Print(g, message=tag +'g')
	    layers.Print(m, message=tag + 'm')
	    layers.Print(h_h, message=tag + 'h_h')
	    layers.Print(q_enc, message=tag + 'q_enc')
	    layers.Print(p_enc, message=tag + 'p_enc')
       
        return m, g
Beispiel #9
0
def rc_model(hidden_size, vocab, args):
    """This function build the whole BiDAF network"""
    emb_shape = [vocab.size(), vocab.embed_dim]
    start_labels = layers.data(name="start_lables",
                               shape=[1],
                               dtype='float32',
                               lod_level=1)
    end_labels = layers.data(name="end_lables",
                             shape=[1],
                             dtype='float32',
                             lod_level=1)

    # stage 1:setup input data, embedding table & encode
    q_id0 = get_data('q_id0', 1, args)

    q_ids = get_data('q_ids', 2, args)
    p_ids_name = 'p_ids'

    p_ids = get_data('p_ids', 2, args)
    # 没有字符级别的embedding
    # 单词级别的 embedding
    p_embs = embedding(p_ids, emb_shape, args)
    q_embs = embedding(q_ids, emb_shape, args)
    drnn = layers.DynamicRNN()
    with drnn.block():
        p_emb = drnn.step_input(p_embs)
        q_emb = drnn.step_input(q_embs)

        # 句子级别的embedding
        p_enc = encoder(p_emb, 'p_enc', hidden_size, args)  # paragraph
        q_enc = encoder(q_emb, 'q_enc', hidden_size, args)  # query

        # Attention flow layer is responsible for linking and
        # fusing information from the context and the query words.
        # stage 2:match
        g_i = attn_flow(q_enc, p_enc, p_ids_name, args)

        # stage 3:fusion 融合
        m_i = fusion(g_i, args)
        drnn.output(m_i, q_enc)

    ms, q_encs = drnn()
    p_vec = layers.lod_reset(x=ms, y=start_labels)
    q_vec = layers.lod_reset(x=q_encs, y=q_id0)

    # stage 4:decode
    start_probs, end_probs = point_network_decoder(p_vec=p_vec,
                                                   q_vec=q_vec,
                                                   hidden_size=hidden_size,
                                                   args=args)

    # calculate model loss
    cost0 = layers.sequence_pool(
        layers.cross_entropy(input=start_probs,
                             label=start_labels,
                             soft_label=True), 'sum')
    cost1 = layers.sequence_pool(
        layers.cross_entropy(input=end_probs,
                             label=end_labels,
                             soft_label=True), 'sum')

    cost0 = layers.mean(cost0)
    cost1 = layers.mean(cost1)
    cost = cost0 + cost1
    cost.persistable = True

    feeding_list = ["q_ids", "start_lables", "end_lables", "p_ids", "q_id0"]
    return cost, start_probs, end_probs, ms, feeding_list