예제 #1
0
    def _net_conf(word_ids, target):
        """
        Configure the network
        """
        word_embedding = fluid.layers.embedding(
            input=word,
            size=[word_dict_len, word_emb_dim],
            dtype='float32',
            is_sparse=IS_SPARSE,
            param_attr=fluid.ParamAttr(
                learning_rate=emb_lr,
                name="word_emb",
                initializer=fluid.initializer.Uniform(
                    low=-init_bound, high=init_bound)))

        # add elmo embedding
        elmo_emb = elmo_encoder(word_ids, args.elmo_l2_coef)
        input_feature = layers.concat(input=[elmo_emb, word_embedding], axis=1)

        for i in range(bigru_num):
            bigru_output = _bigru_layer(input_feature)
            input_feature = bigru_output

        emission = fluid.layers.fc(
            size=label_dict_len,
            input=bigru_output,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Uniform(
                    low=-init_bound, high=init_bound),
                regularizer=fluid.regularizer.L2DecayRegularizer(
                    regularization_coeff=1e-4)))

        crf_cost = fluid.layers.linear_chain_crf(
            input=emission,
            label=target,
            param_attr=fluid.ParamAttr(
                name='crfw', learning_rate=crf_lr))
        crf_decode = fluid.layers.crf_decoding(
            input=emission, param_attr=fluid.ParamAttr(name='crfw'))
        avg_cost = fluid.layers.mean(x=crf_cost)
        return avg_cost, crf_decode
예제 #2
0
def rc_model(hidden_size, vocab, args):
    emb_shape = [vocab.size(), vocab.embed_dim]
    start_labels = layers.data(
        name="start_lables", shape=[1], dtype='float32', lod_level=1)
    end_labels = layers.data(
        name="end_lables", shape=[1], dtype='float32', lod_level=1)
    vocab_size=52445
    # stage 1:encode 
    q_id0 = get_data('q_id0', 1, args)
    q_ids = get_data('q_ids', 2, args)
    p_ids_name = 'p_ids'
    p_ids = get_data('p_ids', 2, args)
    q_ids_elmo = get_data('q_ids_elmo', 2, args)
    p_ids_elmo = get_data('p_ids_elmo', 2, args)
    #layers.Print(p_ids_elmo, message='p_ids_elmo', summarize=10)
    #layers.Print(p_ids, message='p_ids', summarize=10)
    #layers.Print(q_ids_elmo, message='q_ids_elmo', summarize=10)
    #layers.Print(q_ids, message='q_ids', summarize=10)
    p_embs = embedding(p_ids, emb_shape, args)
    q_embs = embedding(q_ids, emb_shape, args)
    if args.elmo==True:
        q_embs_elmo = emb(q_ids_elmo)
        p_embs_elmo = emb(p_ids_elmo)
    drnn = layers.DynamicRNN()
    with drnn.block():
        p_emb = drnn.step_input(p_embs)
        q_emb = drnn.step_input(q_embs)
        if args.elmo==True:
            q_emb_elmo = drnn.step_input(q_embs_elmo)
            p_emb_elmo = drnn.step_input(p_embs_elmo)
            p_encs_elmo= elmo_encoder(p_emb_elmo)
            q_encs_elmo= elmo_encoder(q_emb_elmo)
            #layers.Print(p_encs_elmo, message='p_encs_elmo', summarize=10)
            #layers.Print(q_encs_elmo, message='q_encs_elmo', summarize=10)
            #layers.Print(p_emb, message='p_emb', summarize=10)
            p_emb=layers.concat(input=[p_emb, p_emb_elmo], axis=1)
            q_emb=layers.concat(input=[q_emb, q_emb_elmo], axis=1)      

        p_enc = encoder(p_emb,'p_enc', hidden_size, args)
        q_enc = encoder(q_emb, 'q_enc', hidden_size, args)

        g_i = attn_flow(q_enc, p_enc, p_ids_name, args)
        # stage 3:fusion
        m_i = fusion(g_i, args)
        drnn.output(m_i, q_enc)

    ms, q_encs = drnn()
    p_vec = layers.lod_reset(x=ms, y=start_labels)
    q_vec = layers.lod_reset(x=q_encs, y=q_id0)

    # stage 4:decode 
    start_probs, end_probs = point_network_decoder(
        p_vec=p_vec, q_vec=q_vec, hidden_size=hidden_size, args=args)

    cost0 = layers.sequence_pool(
        layers.cross_entropy(
            input=start_probs, label=start_labels, soft_label=True),
        'sum')
    cost1 = layers.sequence_pool(
        layers.cross_entropy(
            input=end_probs, label=end_labels, soft_label=True),
        'sum')

    cost0 = layers.mean(cost0)
    cost1 = layers.mean(cost1)
    cost = cost0 + cost1
    cost.persistable = True
    feeding_list=[]
    if args.elmo==True:
       feeding_list = ["q_ids", "start_lables", "end_lables", "p_ids", "q_id0","q_ids_elmo","p_ids_elmo"]
    else:
       feeding_list = ["q_ids", "start_lables", "end_lables", "p_ids", "q_id0"]
    return cost, start_probs, end_probs, ms, feeding_list