Beispiel #1
0
def rc_model(hidden_size, vocab, args):
    emb_shape = [vocab.size(), vocab.embed_dim]
    start_labels = layers.data(name="start_lables",
                               shape=[1],
                               dtype='float32',
                               lod_level=1)
    end_labels = layers.data(name="end_lables",
                             shape=[1],
                             dtype='float32',
                             lod_level=1)

    # stage 1:encode
    q_id0 = get_data('q_id0', 1, args)

    q_ids = get_data('q_ids', 2, args)
    p_ids_name = 'p_ids'

    p_ids = get_data('p_ids', 2, args)
    p_embs = embedding(p_ids, emb_shape, args)
    q_embs = embedding(q_ids, emb_shape, args)
    drnn = layers.DynamicRNN()
    with drnn.block():
        p_emb = drnn.step_input(p_embs)
        q_emb = drnn.step_input(q_embs)

        p_enc = encoder(p_emb, 'p_enc', hidden_size, args)
        q_enc = encoder(q_emb, 'q_enc', hidden_size, args)

        # stage 2:match
        g_i = attn_flow(q_enc, p_enc, p_ids_name, args)
        # stage 3:fusion
        m_i = fusion(g_i, args)
        drnn.output(m_i, q_enc)

    ms, q_encs = drnn()
    p_vec = layers.lod_reset(x=ms, y=start_labels)
    q_vec = layers.lod_reset(x=q_encs, y=q_id0)

    # stage 4:decode
    start_probs, end_probs = point_network_decoder(p_vec=p_vec,
                                                   q_vec=q_vec,
                                                   hidden_size=hidden_size,
                                                   args=args)

    cost0 = layers.sequence_pool(
        layers.cross_entropy(input=start_probs,
                             label=start_labels,
                             soft_label=True), 'sum')
    cost1 = layers.sequence_pool(
        layers.cross_entropy(input=end_probs,
                             label=end_labels,
                             soft_label=True), 'sum')

    cost0 = layers.mean(cost0)
    cost1 = layers.mean(cost1)
    cost = cost0 + cost1
    cost.persistable = True

    feeding_list = ["q_ids", "start_lables", "end_lables", "p_ids", "q_id0"]
    return cost, start_probs, end_probs, ms, feeding_list
Beispiel #2
0
def attn_flow(q_enc, p_enc, p_ids_name, args):
    """Bidirectional Attention layer"""
    tag = p_ids_name + "__"
    drnn = layers.DynamicRNN()
    with drnn.block():
        h_cur = drnn.step_input(p_enc)
        u_all = drnn.static_input(q_enc)
        h_expd = layers.sequence_expand(x=h_cur, y=u_all)
        s_t_mul = layers.elementwise_mul(x=u_all, y=h_expd, axis=0)
        s_t_sum = layers.reduce_sum(input=s_t_mul, dim=1, keep_dim=True)
        s_t_re = layers.reshape(s_t_sum, shape=[-1, 0])
        s_t = layers.sequence_softmax(input=s_t_re)
        u_expr = layers.elementwise_mul(x=u_all, y=s_t, axis=0)
        u_expr = layers.sequence_pool(input=u_expr, pool_type='sum')

        b_t = layers.sequence_pool(input=s_t_sum, pool_type='max')
        drnn.output(u_expr, b_t)
    U_expr, b = drnn()
    b_norm = layers.sequence_softmax(input=b)
    h_expr = layers.elementwise_mul(x=p_enc, y=b_norm, axis=0)
    h_expr = layers.sequence_pool(input=h_expr, pool_type='sum')

    H_expr = layers.sequence_expand(x=h_expr, y=p_enc)
    H_expr = layers.lod_reset(x=H_expr, y=p_enc)
    h_u = layers.elementwise_mul(x=p_enc, y=U_expr, axis=0)
    h_h = layers.elementwise_mul(x=p_enc, y=H_expr, axis=0)

    g = layers.concat(input=[p_enc, U_expr, h_u, h_h], axis=1)
    return dropout(g, args)
Beispiel #3
0
def sequence_softmax(x, beta=None):
    """Compute sequence softmax over paddle LodTensor

    This function compute softmax normalization along with the length of sequence.
    This function is an extention of :code:`L.sequence_softmax` which can only
    deal with LodTensor whose last dimension is 1.

    Args:
        x: The input variable which is a LodTensor.
        beta: Inverse Temperature

    Return:
        Output of sequence_softmax
    """

    if beta is not None:
        x = x * beta

    x_max = L.sequence_pool(x, "max")
    x_max = L.sequence_expand_as(x_max, x)
    x = x - x_max
    exp_x = L.exp(x)
    sum_exp_x = L.sequence_pool(exp_x, "sum")
    sum_exp_x = L.sequence_expand_as(sum_exp_x, exp_x)
    return exp_x / sum_exp_x
Beispiel #4
0
    def build_model(self):
        node_features = self.graph_wrapper.node_feat["feat"]

        output = self.gcn(gw=self.graph_wrapper,
                          feature=node_features,
                          hidden_size=self.hidden_size,
                          activation="relu",
                          norm=self.graph_wrapper.node_feat["norm"],
                          name="gcn_layer_1")
        output1 = output
        output = self.gcn(gw=self.graph_wrapper,
                          feature=output,
                          hidden_size=self.hidden_size,
                          activation="relu",
                          norm=self.graph_wrapper.node_feat["norm"],
                          name="gcn_layer_2")
        output2 = output
        output = self.gcn(gw=self.graph_wrapper,
                          feature=output,
                          hidden_size=self.hidden_size,
                          activation="relu",
                          norm=self.graph_wrapper.node_feat["norm"],
                          name="gcn_layer_3")

        output = L.concat(input=[output1, output2, output], axis=-1)

        output, ratio_length = sag_pool(gw=self.graph_wrapper,
                                        feature=output,
                                        ratio=self.pooling_ratio,
                                        graph_id=self.graph_id,
                                        dataset=self.args.dataset_name,
                                        name="sag_pool_1")
        output = L.lod_reset(output, self.graph_wrapper.graph_lod)
        cat1 = L.sequence_pool(output, "sum")
        ratio_length = L.cast(ratio_length, dtype="float32")
        cat1 = L.elementwise_div(cat1, ratio_length, axis=-1)
        cat2 = L.sequence_pool(output, "max")
        output = L.concat(input=[cat2, cat1], axis=-1)

        output = L.fc(output, size=self.hidden_size, act="relu")
        output = L.dropout(output, dropout_prob=self.dropout_ratio)
        output = L.fc(output, size=self.hidden_size // 2, act="relu")
        output = L.fc(output,
                      size=self.num_classes,
                      act=None,
                      param_attr=fluid.ParamAttr(name="final_fc"))

        self.labels = L.cast(self.labels, dtype="float32")
        loss = L.sigmoid_cross_entropy_with_logits(x=output, label=self.labels)
        self.loss = L.mean(loss)
        pred = L.sigmoid(output)
        self.pred = L.argmax(x=pred, axis=-1)
        correct = L.equal(self.pred, self.labels_1dim)
        correct = L.cast(correct, dtype="int32")
        self.correct = L.reduce_sum(correct)
Beispiel #5
0
        def __call__(self, msg):
            alpha = msg["alpha"]  # lod-tensor (batch_size, num_heads)
            if attn_drop:
                old_h = alpha
                dropout = F.data(name='attn_drop', shape=[1], dtype="int64")
                u = L.uniform_random(shape=L.cast(L.shape(alpha)[:1], 'int64'),
                                     min=0.,
                                     max=1.)
                keeped = L.cast(u > dropout, dtype="float32")
                self_attn_mask = L.scale(x=keeped,
                                         scale=10000.0,
                                         bias=-1.0,
                                         bias_after_scale=False)
                n_head_self_attn_mask = L.stack(x=[self_attn_mask] * num_heads,
                                                axis=1)
                n_head_self_attn_mask.stop_gradient = True
                alpha = n_head_self_attn_mask + alpha
                alpha = L.lod_reset(alpha, old_h)

            h = msg["v"]
            alpha = paddle_helper.sequence_softmax(alpha)

            self.alpha = alpha
            old_h = h
            h = h * alpha
            h = L.lod_reset(h, old_h)
            h = L.sequence_pool(h, "sum")

            if concat:
                h = L.reshape(h, [-1, num_heads * hidden_size])
            else:
                h = L.reduce_mean(h, dim=1)
            return h
Beispiel #6
0
        def custom_dynamic_rnn(p_vec, init_state, decoder_size):
            context = layers.fc(input=p_vec,
			    size=decoder_size,
			    act=None)

	    drnn = layers.DynamicRNN()
	    with drnn.block():
		H_s = drnn.step_input(p_vec)
		ctx = drnn.static_input(context)

		c_prev = drnn.memory(init=init_state, need_reorder=True)
		m_prev = drnn.memory(init=init_state, need_reorder=True)
		m_prev1 = layers.fc(input=m_prev, size=decoder_size, act=None)
		m_prev1 = layers.sequence_expand(x=m_prev1, y=ctx)

		Fk = ctx + m_prev1
		Fk = layers.fc(input=Fk, size=decoder_size, act='tanh')
		logits = layers.fc(input=Fk, size=1, act=None)

		scores = layers.sequence_softmax(input=logits)
		attn_ctx = layers.elementwise_mul(x=ctx, y=scores, axis=0)
		attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum')
		hidden_t, cell_t = lstm_step(attn_ctx, hidden_t_prev=m_prev1, cell_t_prev=c_prev, size=decoder_size)

		drnn.update_memory(ex_mem=m_prev, new_mem=hidden_t)
		drnn.update_memory(ex_mem=c_prev, new_mem=cell_t)
      
		drnn.output(scores)
	    beta = drnn()
            return beta
    def forward(self, is_test=False):
        """
        Build the network.
        """
        substruct_graph_wrapper = GraphWrapper(
            name="graph",
            node_feat=[('atom_type', [None, 1], "int64"),
                       ('chirality_tag', [None, 1], "int64")],
            edge_feat=[('bond_type', [None, 1], "int64"),
                       ('bond_direction', [None, 1], "int64")])
        context_graph_wrapper = GraphWrapper(
            name="context_graph",
            node_feat=[('atom_type', [None, 1], "int64"),
                       ('chirality_tag', [None, 1], "int64")],
            edge_feat=[('bond_type', [None, 1], "int64"),
                       ('bond_direction', [None, 1], "int64")])
        substruct_center_idx = layers.data(name="substruct_center_idx",
                                           shape=[-1, 1],
                                           dtype="int64")
        context_overlap_idx = layers.data(name="context_overlap_idx",
                                          shape=[-1, 1],
                                          dtype="int64")
        context_overlap_lod = layers.data(name="context_overlap_lod",
                                          shape=[1, -1],
                                          dtype="int32")
        context_cycle_index = layers.data(name="context_cycle_index",
                                          shape=[-1, 1],
                                          dtype="int64")

        substruct_node_repr = self.substruct_model.forward(
            substruct_graph_wrapper, is_test=is_test)
        substruct_repr = layers.gather(substruct_node_repr,
                                       substruct_center_idx)

        context_node_repr = self.context_model.forward(context_graph_wrapper,
                                                       is_test=is_test)
        context_overlap_repr = layers.gather(context_node_repr,
                                             context_overlap_idx)
        context_repr = layers.sequence_pool(
            layers.lod_reset(context_overlap_repr, context_overlap_lod),
            self.context_pooling)
        neg_context_repr = layers.gather(context_repr, context_cycle_index)

        pred_pos = layers.reduce_sum(substruct_repr * context_repr, 1)
        pred_neg = layers.reduce_sum(substruct_repr * neg_context_repr, 1)
        label_pos = pred_pos * 0.0 + 1.0
        label_pos.stop_gradient = True
        label_neg = pred_neg * 0.0
        label_neg.stop_gradient = True

        loss = layers.sigmoid_cross_entropy_with_logits(x=pred_pos, label=label_pos) \
                + layers.sigmoid_cross_entropy_with_logits(x=pred_neg, label=label_neg)
        loss = layers.reduce_mean(loss)

        self.substruct_graph_wrapper = substruct_graph_wrapper
        self.context_graph_wrapper = context_graph_wrapper
        self.loss = loss
    def _build_net(self):

        self.pool1 = layers.sequence_pool(input=self.src_emb,
                                          pool_type="average")

        self.output = layers.fc(self.pool1, 2)
        self.output = layers.softmax(self.output)

        return self.output
Beispiel #9
0
def graphsage_sum(feature, gw, hidden_size, name, act):
    msg = gw.send(lambda s, d, e: s["h"], nfeat_list=[("h", feature)])
    neigh_feature = gw.recv(
        msg, lambda feat: L.sequence_pool(feat, pool_type="sum"))

    hidden_size = hidden_size
    self_feature = linear(feature, hidden_size, name + "_l", act)
    neigh_feature = linear(neigh_feature, hidden_size, name + "_r", act)
    output = L.concat([self_feature, neigh_feature], axis=1)
    output = L.l2_normalize(output, axis=1)
    return output
def get_mov_combined_features():

    MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1

    mov_id = layers.data(name='movie_id', shape=[1], dtype='int64')

    mov_emb = layers.embedding(input=mov_id,
                               dtype='float32',
                               size=[MOV_DICT_SIZE, 32],
                               param_attr='movie_table',
                               is_sparse=IS_SPARSE)

    mov_fc = layers.fc(input=mov_emb, size=32)

    CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())

    category_id = layers.data(name='category_id',
                              shape=[1],
                              dtype='int64',
                              lod_level=1)

    mov_categories_emb = layers.embedding(input=category_id,
                                          size=[CATEGORY_DICT_SIZE, 32],
                                          is_sparse=IS_SPARSE)

    mov_categories_hidden = layers.sequence_pool(input=mov_categories_emb,
                                                 pool_type="sum")

    MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())

    mov_title_id = layers.data(name='movie_title',
                               shape=[1],
                               dtype='int64',
                               lod_level=1)

    mov_title_emb = layers.embedding(input=mov_title_id,
                                     size=[MOV_TITLE_DICT_SIZE, 32],
                                     is_sparse=IS_SPARSE)

    # 电影标题名称(title)是一个序列的整数,整数代表的是这个词在索引序列中的下标。
    # 这个序列会被送入 sequence_conv_pool 层,这个层会在时间维度上使用卷积和池化。
    # 因为如此,所以输出会是固定长度,尽管输入的序列长度各不相同。
    mov_title_conv = nets.sequence_conv_pool(input=mov_title_emb,
                                             num_filters=32,
                                             filter_size=3,
                                             act="tanh",
                                             pool_type="sum")

    concat_embed = layers.concat(
        input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1)

    mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")

    return mov_combined_features
Beispiel #11
0
    def recv_func(message):
        # 每条边的终点的特征
        dst_feat = message['dst_node_feat']
        # 每条边的出发点的特征
        src_feat = message['src_node_feat']
        # 每个中心点自己的特征
        x = L.sequence_pool(dst_feat, 'average')
        # 每个中心点的邻居的特征的平均值
        z = L.sequence_pool(src_feat, 'average')

        # 计算 gate
        feat_gate = message['feat_gate']
        g_max = L.sequence_pool(feat_gate, 'max')
        g = L.concat([x, g_max, z], axis=1)
        g = L.fc(g, heads, bias_attr=False, act="sigmoid")

        # softmax
        alpha = message['alpha']
        alpha = paddle_helper.sequence_softmax(alpha)  # E * M

        feat_value = message['feat_value']  # E * (M * D2)
        old = feat_value
        feat_value = L.reshape(feat_value,
                               [-1, heads, hidden_size_v])  # E * M * D2
        feat_value = L.elementwise_mul(feat_value, alpha, axis=0)
        feat_value = L.reshape(feat_value,
                               [-1, heads * hidden_size_v])  # E * (M * D2)
        feat_value = L.lod_reset(feat_value, old)

        feat_value = L.sequence_pool(feat_value, 'sum')  # N * (M * D2)

        feat_value = L.reshape(feat_value,
                               [-1, heads, hidden_size_v])  # N * M * D2

        output = L.elementwise_mul(feat_value, g, axis=0)
        output = L.reshape(output, [-1, heads * hidden_size_v])  # N * (M * D2)

        output = L.concat([x, output], axis=1)

        return output
def get_mov_combined_features():

    MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1

    mov_id = layers.data(name='movie_id', shape=[1], dtype='int64')

    mov_emb = layers.embedding(input=mov_id,
                               dtype='float32',
                               size=[MOV_DICT_SIZE, 32],
                               param_attr='movie_table',
                               is_sparse=IS_SPARSE)

    mov_fc = layers.fc(input=mov_emb, size=32)

    CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())

    category_id = layers.data(name='category_id',
                              shape=[1],
                              dtype='int64',
                              lod_level=1)

    mov_categories_emb = layers.embedding(input=category_id,
                                          size=[CATEGORY_DICT_SIZE, 32],
                                          is_sparse=IS_SPARSE)

    mov_categories_hidden = layers.sequence_pool(input=mov_categories_emb,
                                                 pool_type="sum")

    MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())

    mov_title_id = layers.data(name='movie_title',
                               shape=[1],
                               dtype='int64',
                               lod_level=1)

    mov_title_emb = layers.embedding(input=mov_title_id,
                                     size=[MOV_TITLE_DICT_SIZE, 32],
                                     is_sparse=IS_SPARSE)

    mov_title_conv = nets.sequence_conv_pool(input=mov_title_emb,
                                             num_filters=32,
                                             filter_size=3,
                                             act="tanh",
                                             pool_type="sum")

    concat_embed = layers.concat(
        input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1)

    # FIXME(dzh) : need tanh operator
    mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")

    return mov_combined_features
Beispiel #13
0
 def recv_func(message):
     nt = message["nt"]
     att = message["att"]
     h = message["h"]
     output_h = []
     for i in range(2):
         mask = L.cast(nt == i, dtype="float32") 
         rel_att = att[:, i:i+1] + ( 1 - mask ) * -10000
         rel_att = paddle_helper.sequence_softmax(rel_att)
         rel_h = L.sequence_pool(h * rel_att * mask, "sum")
         output_h.append(rel_h)
     output_h = L.concat(output_h, -1)
     return output_h
Beispiel #14
0
        def static_rnn(step,
                       p_vec=p_vec,
                       init_state=None,
                       para_name='',
                       args=args):
            tag = para_name + "static_rnn_"
            ctx = layers.fc(
                input=p_vec,
                param_attr=fluid.ParamAttr(name=tag + 'context_fc_w'),
                bias_attr=fluid.ParamAttr(name=tag + 'context_fc_b'),
                size=hidden_size,
                act=None)

            beta = []
            c_prev = init_state
            m_prev = init_state
            for i in range(step):
                m_prev0 = layers.fc(
                    input=m_prev,
                    size=hidden_size,
                    act=None,
                    param_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_w'),
                    bias_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_b'))
                m_prev1 = layers.sequence_expand(x=m_prev0, y=ctx)

                Fk = ctx + m_prev1
                Fk = layers.tanh(Fk)
                logits = layers.fc(
                    input=Fk,
                    size=1,
                    act=None,
                    param_attr=fluid.ParamAttr(name=tag + 'logits_fc_w'),
                    bias_attr=fluid.ParamAttr(name=tag + 'logits_fc_b'))

                scores = layers.sequence_softmax(input=logits)
                attn_ctx = layers.elementwise_mul(x=p_vec, y=scores, axis=0)
                attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum')

                hidden_t, cell_t = lstm_step(
                    attn_ctx,
                    hidden_t_prev=m_prev,
                    cell_t_prev=c_prev,
                    size=hidden_size,
                    para_name=tag,
                    args=args)
                m_prev = hidden_t
                c_prev = cell_t
                beta.append(scores)
            return beta
Beispiel #15
0
Datei: conv.py Projekt: zzs95/PGL
    def recv_func(message):
        # feature of src and dst node on each edge
        dst_feat = message['dst_node_feat']
        src_feat = message['src_node_feat']
        # feature of center node
        x = L.sequence_pool(dst_feat, 'average')
        # feature of neighbors of center node
        z = L.sequence_pool(src_feat, 'average')

        # compute gate
        feat_gate = message['feat_gate']
        g_max = L.sequence_pool(feat_gate, 'max')
        g = L.concat([x, g_max, z], axis=1)
        g = L.fc(g, heads, bias_attr=False, act="sigmoid")

        # softmax
        alpha = message['alpha']
        alpha = paddle_helper.sequence_softmax(alpha) # E * M

        feat_value = message['feat_value'] # E * (M * D2)
        old = feat_value
        feat_value = L.reshape(feat_value, [-1, heads, hidden_size_v]) # E * M * D2
        feat_value = L.elementwise_mul(feat_value, alpha, axis=0)
        feat_value = L.reshape(feat_value, [-1, heads*hidden_size_v]) # E * (M * D2)
        feat_value = L.lod_reset(feat_value, old)

        feat_value = L.sequence_pool(feat_value, 'sum') # N * (M * D2)

        feat_value = L.reshape(feat_value, [-1, heads, hidden_size_v]) # N * M * D2

        output = L.elementwise_mul(feat_value, g, axis=0)
        output = L.reshape(output, [-1, heads * hidden_size_v]) # N * (M * D2)

        output = L.concat([x, output], axis=1)

        return output
def get_mov_combined_features():

    MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1

    mov_id = layers.data(name='movie_id', shape=[1], dtype='int64')

    mov_emb = layers.embedding(
        input=mov_id,
        dtype='float32',
        size=[MOV_DICT_SIZE, 32],
        param_attr='movie_table',
        is_sparse=IS_SPARSE)

    mov_fc = layers.fc(input=mov_emb, size=32)

    CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())

    category_id = layers.data(
        name='category_id', shape=[1], dtype='int64', lod_level=1)

    mov_categories_emb = layers.embedding(
        input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)

    mov_categories_hidden = layers.sequence_pool(
        input=mov_categories_emb, pool_type="sum")

    MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())

    mov_title_id = layers.data(
        name='movie_title', shape=[1], dtype='int64', lod_level=1)

    mov_title_emb = layers.embedding(
        input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)

    mov_title_conv = nets.sequence_conv_pool(
        input=mov_title_emb,
        num_filters=32,
        filter_size=3,
        act="tanh",
        pool_type="sum")

    concat_embed = layers.concat(
        input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1)

    # FIXME(dzh) : need tanh operator
    mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")

    return mov_combined_features
Beispiel #17
0
 def reduce_attention(msg):
     alpha = msg["alpha"]  # lod-tensor (batch_size, seq_len, num_heads)
     h = msg["h"]
     alpha = paddle_helper.sequence_softmax(alpha)
     old_h = h
     h = L.reshape(h, [-1, num_heads, hidden_size])
     alpha = L.reshape(alpha, [-1, num_heads, 1])
     if attn_drop > 1e-15:
         alpha = L.dropout(alpha,
                           dropout_prob=attn_drop,
                           is_test=is_test,
                           dropout_implementation="upscale_in_train")
     h = h * alpha
     h = L.reshape(h, [-1, num_heads * hidden_size])
     h = L.lod_reset(h, old_h)
     return L.sequence_pool(h, "sum")
Beispiel #18
0
    def recv_score_v_spmm(msg):
        score = msg["score"]
        score = paddle_helper.sequence_softmax(score)
        score = layers.dropout(score,
                               dropout_prob=dropout_rate,
                               dropout_implementation="upscale_in_train",
                               is_test=False)

        score = L.reshape(score, [-1, n_head, 1])
        _v = msg["value"]
        _new_v = L.reshape(_v, [-1, n_head, _v.shape[-1] // n_head])

        _new_v = _new_v * score

        _new_v = L.reshape(_new_v, [-1, _v.shape[-1]])
        _new_v = L.lod_reset(_new_v, _v)
        return L.sequence_pool(_new_v, "sum")
Beispiel #19
0
def graph_pooling(gw, node_feat, pool_type):
    """Implementation of graph pooling 

    This is an implementation of graph pooling

    Args:
        gw: Graph wrapper object (:code:`StaticGraphWrapper` or :code:`GraphWrapper`)

        node_feat: A tensor with shape (num_nodes, feature_size).

        pool_type: The type of pooling ("sum", "average" , "min")

    Return:
        A tensor with shape (num_graph, hidden_size)
    """
    graph_feat = op.nested_lod_reset(node_feat, gw.graph_lod)
    graph_feat = L.sequence_pool(graph_feat, pool_type)
    return graph_feat
Beispiel #20
0
    def __call__(self, inputs, labels=None, mode=None):
        encoder_features = self.encoder(inputs)
        char_num = self.char_num
        word_vector_dim = self.word_vector_dim
        decoder_size = self.decoder_size

        if self.encoder_type == "reshape":
            encoder_input = encoder_features
            encoded_vector = encoder_features
        else:
            encoder_input = encoder_features[1]
            encoded_vector = layers.concat(encoder_features, axis=1)
        encoded_proj = layers.fc(input=encoded_vector,
                                 size=decoder_size,
                                 bias_attr=False,
                                 name="encoded_proj_fc")
        backward_first = layers.sequence_pool(
            input=encoder_input, pool_type='first')
        decoder_boot = layers.fc(input=backward_first,
                                 size=decoder_size,
                                 bias_attr=False,
                                 act="relu",
                                 name='decoder_boot')

        if mode == "train":
            label_in = labels['label_in']
            label_out = labels['label_out']
            label_in = layers.cast(x=label_in, dtype='int64')
            trg_embedding = layers.embedding(
                input=label_in,
                size=[char_num, word_vector_dim],
                dtype='float32')
            predict = self.gru_decoder_with_attention(
                trg_embedding, encoded_vector, encoded_proj, decoder_boot,
                decoder_size, char_num)
            _, decoded_out = layers.topk(input=predict, k=1)
            decoded_out = layers.lod_reset(decoded_out, y=label_out)
            predicts = {'predict': predict, 'decoded_out': decoded_out}
        else:
            ids = self.gru_attention_infer(
                decoder_boot, self.max_length, char_num, word_vector_dim,
                encoded_vector, encoded_proj, decoder_size)
            predicts = {'decoded_out': ids}
        return predicts
Beispiel #21
0
 def simple_attention(self, encoder_vec, encoder_proj, decoder_state,
                      decoder_size):
     decoder_state_proj = layers.fc(input=decoder_state,
                                    size=decoder_size,
                                    bias_attr=False,
                                    name="decoder_state_proj_fc")
     decoder_state_expand = layers.sequence_expand(
         x=decoder_state_proj, y=encoder_proj)
     concated = layers.elementwise_add(encoder_proj, decoder_state_expand)
     concated = layers.tanh(x=concated)
     attention_weights = layers.fc(input=concated,
                                   size=1,
                                   act=None,
                                   bias_attr=False,
                                   name="attention_weights_fc")
     attention_weights = layers.sequence_softmax(input=attention_weights)
     weigths_reshape = layers.reshape(x=attention_weights, shape=[-1])
     scaled = layers.elementwise_mul(
         x=encoder_vec, y=weigths_reshape, axis=0)
     context = layers.sequence_pool(input=scaled, pool_type='sum')
     return context
Beispiel #22
0
def max_recv(feat):
    """tbd"""
    return layers.sequence_pool(feat, pool_type="max")
Beispiel #23
0
def sum_recv(feat):
    """tbd"""
    return layers.sequence_pool(feat, pool_type="sum")
Beispiel #24
0
def mean_recv(feat):
    """tbd"""
    return layers.sequence_pool(feat, pool_type="average")
Beispiel #25
0
def max_recv(feat):
    return L.sequence_pool(feat, pool_type="max")
Beispiel #26
0
def point_network_decoder(p_vec, q_vec, hidden_size, args):
    """Output layer - pointer network"""
    tag = 'pn_decoder_'
    init_random = fluid.initializer.Normal(loc=0.0, scale=1.0)

    random_attn = layers.create_parameter(
        shape=[1, hidden_size],
        dtype='float32',
        default_initializer=init_random)
    random_attn = layers.fc(
        input=random_attn,
        size=hidden_size,
        act=None,
        param_attr=fluid.ParamAttr(name=tag + 'random_attn_fc_w'),
        bias_attr=fluid.ParamAttr(name=tag + 'random_attn_fc_b'))
    random_attn = layers.reshape(random_attn, shape=[-1])
    U = layers.fc(input=q_vec,
                  param_attr=fluid.ParamAttr(name=tag + 'q_vec_fc_w'),
                  bias_attr=False,
                  size=hidden_size,
                  act=None) + random_attn
    U = layers.tanh(U)

    logits = layers.fc(input=U,
                       param_attr=fluid.ParamAttr(name=tag + 'logits_fc_w'),
                       bias_attr=fluid.ParamAttr(name=tag + 'logits_fc_b'),
                       size=1,
                       act=None)
    scores = layers.sequence_softmax(input=logits)
    pooled_vec = layers.elementwise_mul(x=q_vec, y=scores, axis=0)
    pooled_vec = layers.sequence_pool(input=pooled_vec, pool_type='sum')

    init_state = layers.fc(
        input=pooled_vec,
        param_attr=fluid.ParamAttr(name=tag + 'init_state_fc_w'),
        bias_attr=fluid.ParamAttr(name=tag + 'init_state_fc_b'),
        size=hidden_size,
        act=None)

    def custom_dynamic_rnn(p_vec, init_state, hidden_size, para_name, args):
        tag = para_name + "custom_dynamic_rnn_"

        def static_rnn(step,
                       p_vec=p_vec,
                       init_state=None,
                       para_name='',
                       args=args):
            tag = para_name + "static_rnn_"
            ctx = layers.fc(
                input=p_vec,
                param_attr=fluid.ParamAttr(name=tag + 'context_fc_w'),
                bias_attr=fluid.ParamAttr(name=tag + 'context_fc_b'),
                size=hidden_size,
                act=None)

            beta = []
            c_prev = init_state
            m_prev = init_state
            for i in range(step):
                m_prev0 = layers.fc(
                    input=m_prev,
                    size=hidden_size,
                    act=None,
                    param_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_w'),
                    bias_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_b'))
                m_prev1 = layers.sequence_expand(x=m_prev0, y=ctx)

                Fk = ctx + m_prev1
                Fk = layers.tanh(Fk)
                logits = layers.fc(
                    input=Fk,
                    size=1,
                    act=None,
                    param_attr=fluid.ParamAttr(name=tag + 'logits_fc_w'),
                    bias_attr=fluid.ParamAttr(name=tag + 'logits_fc_b'))

                scores = layers.sequence_softmax(input=logits)
                attn_ctx = layers.elementwise_mul(x=p_vec, y=scores, axis=0)
                attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum')

                hidden_t, cell_t = lstm_step(
                    attn_ctx,
                    hidden_t_prev=m_prev,
                    cell_t_prev=c_prev,
                    size=hidden_size,
                    para_name=tag,
                    args=args)
                m_prev = hidden_t
                c_prev = cell_t
                beta.append(scores)
            return beta

        return static_rnn(
            2, p_vec=p_vec, init_state=init_state, para_name=para_name)

    fw_outputs = custom_dynamic_rnn(p_vec, init_state, hidden_size, tag + "fw_",
                                    args)
    bw_outputs = custom_dynamic_rnn(p_vec, init_state, hidden_size, tag + "bw_",
                                    args)

    start_prob = layers.elementwise_add(
        x=fw_outputs[0], y=bw_outputs[1], axis=0) / 2
    end_prob = layers.elementwise_add(
        x=fw_outputs[1], y=bw_outputs[0], axis=0) / 2

    return start_prob, end_prob
Beispiel #27
0
from __future__ import print_function
import paddle.fluid as fluid
import paddle.fluid.layers as layers

####################
# original program #
####################
main_prog = fluid.Program()
start_prog = fluid.Program()
with fluid.program_guard(main_prog, start_prog):
    slot = fluid.data('slot', [-1, 1], dtype='int64', lod_level=1)
    label = fluid.data('label', [-1, 1])
    emb = layers.embedding(slot, [4, 12],
                           param_attr=fluid.ParamAttr(name="emb"))
    pool = layers.sequence_pool(emb, 'sum')
    fc = layers.fc(pool, 12, act='relu')
    logit = layers.fc(fc, 1)
    loss = layers.sigmoid_cross_entropy_with_logits(logit, label)

exe = fluid.Executor(fluid.CUDAPlace(0))
# if no GPU is available, use statement below:
#exe = fluid.Executor(fluid.CPUPlace())

# initialize all parameters
exe.run(start_prog)

fluid.io.save_persistables(exe, dirname="model", main_program=main_prog)

# show all parameters in the original model
param_names = {var.name for var in main_prog.list_vars() if var.persistable}
print(param_names)
Beispiel #28
0
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.layers as layers

slot = fluid.data('slot', [-1, 1], dtype='int64', lod_level=1)
ones = layers.ones_like(slot)
float_ones = layers.cast(ones, dtype='float32')
value = layers.sequence_pool(float_ones, pool_type='sum')

feed_list = {
    'slot':
    fluid.create_lod_tensor(np.array([[0], [1], [2], [3], [4]], dtype='int64'),
                            [[3, 2]], fluid.CPUPlace())
}
fetch_list = [value]
exe = fluid.Executor(fluid.CPUPlace())
result = exe.run(fluid.default_main_program(),
                 feed=feed_list,
                 fetch_list=fetch_list)
print('sequence length:', result)
Beispiel #29
0
def mean_recv(feat):
    return L.sequence_pool(feat, pool_type="average")
Beispiel #30
0
def _is_backward_op(op, op_role_key):
    return op_role_key in op.attr_names and \
        int(op.all_attrs()[op_role_key]) & int(OpRole.Backward)


avgw_list = []
# 自定义Main Program和Start Program
main_program = fluid.Program()
start_program = fluid.Program()
with fluid.program_guard(main_program, start_program):
    # 组网
    slot = fluid.data('slot', [-1, 1], dtype='int64', lod_level=1)
    label = fluid.data('label', [-1, 1])
    emb = layers.embedding(slot, [5, 12])
    pool = layers.sequence_pool(emb, 'sum')
    logit = layers.fc(pool, 1)
    loss = layers.sigmoid_cross_entropy_with_logits(logit, label)
    avg_cost = layers.mean(loss)

    # 定义优化器
    sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01)
    sgd_optimizer.minimize(avg_cost)

    decay_var = layers.fill_constant(shape=[1], value=0.9, dtype='float32')
    rev_decay_var = layers.fill_constant(shape=[1], value=0.1, dtype='float32')

    block = main_program.global_block()
    op_maker = core.op_proto_and_checker_maker
    op_role_key = op_maker.kOpRoleAttrName()  # "op_role"
    op_role_var_key = op_maker.kOpRoleVarAttrName()  # "op_role_var"
Beispiel #31
0
def sum_recv(feat):
    return L.sequence_pool(feat, pool_type="sum")