Beispiel #1
0
    def recv_func(self, message):
        dst_feat = message['dst_node_feat']
        src_feat = message['src_node_feat']
        x = fluid.layers.sequence_pool(dst_feat, 'average')
        z = fluid.layers.sequence_pool(src_feat, 'average')

        feat_gate = message['feat_gate']
        g_max = fluid.layers.sequence_pool(feat_gate, 'max')

        g = fluid.layers.concat([x, g_max, z], axis=1)
        g = fluid.layers.fc(g, self.heads, bias_attr=False, act="sigmoid")

        # softmax
        alpha = message['alpha']
        alpha = paddle_helper.sequence_softmax(alpha)  # E * M

        feat_value = message['feat_value']  # E * (M * D2)
        old = feat_value
        feat_value = fluid.layers.reshape(
            feat_value, [-1, self.heads, self.hidden_size_v])  # E * M * D2
        feat_value = fluid.layers.elementwise_mul(feat_value, alpha, axis=0)
        feat_value = fluid.layers.reshape(
            feat_value, [-1, self.heads * self.hidden_size_v])  # E * (M * D2)
        feat_value = fluid.layers.lod_reset(feat_value, old)

        feat_value = fluid.layers.sequence_pool(feat_value,
                                                'sum')  # N * (M * D2)
        feat_value = fluid.layers.reshape(
            feat_value, [-1, self.heads, self.hidden_size_v])  # N * M * D2
        output = fluid.layers.elementwise_mul(feat_value, g, axis=0)
        output = fluid.layers.reshape(
            output, [-1, self.heads * self.hidden_size_v])  # N * (M * D2)
        output = fluid.layers.concat([x, output], axis=1)

        return output
Beispiel #2
0
        def __call__(self, msg):
            alpha = msg["alpha"]  # lod-tensor (batch_size, num_heads)
            if attn_drop:
                old_h = alpha
                dropout = F.data(name='attn_drop', shape=[1], dtype="int64")
                u = L.uniform_random(shape=L.cast(L.shape(alpha)[:1], 'int64'),
                                     min=0.,
                                     max=1.)
                keeped = L.cast(u > dropout, dtype="float32")
                self_attn_mask = L.scale(x=keeped,
                                         scale=10000.0,
                                         bias=-1.0,
                                         bias_after_scale=False)
                n_head_self_attn_mask = L.stack(x=[self_attn_mask] * num_heads,
                                                axis=1)
                n_head_self_attn_mask.stop_gradient = True
                alpha = n_head_self_attn_mask + alpha
                alpha = L.lod_reset(alpha, old_h)

            h = msg["v"]
            alpha = paddle_helper.sequence_softmax(alpha)

            self.alpha = alpha
            old_h = h
            h = h * alpha
            h = L.lod_reset(h, old_h)
            h = L.sequence_pool(h, "sum")

            if concat:
                h = L.reshape(h, [-1, num_heads * hidden_size])
            else:
                h = L.reduce_mean(h, dim=1)
            return h
Beispiel #3
0
 def recv_func(message):
     nt = message["nt"]
     att = message["att"]
     h = message["h"]
     output_h = []
     for i in range(2):
         mask = L.cast(nt == i, dtype="float32") 
         rel_att = att[:, i:i+1] + ( 1 - mask ) * -10000
         rel_att = paddle_helper.sequence_softmax(rel_att)
         rel_h = L.sequence_pool(h * rel_att * mask, "sum")
         output_h.append(rel_h)
     output_h = L.concat(output_h, -1)
     return output_h
Beispiel #4
0
 def reduce_attention(msg):
     alpha = msg["alpha"]  # lod-tensor (batch_size, seq_len, num_heads)
     h = msg["h"]
     alpha = paddle_helper.sequence_softmax(alpha)
     old_h = h
     h = L.reshape(h, [-1, num_heads, hidden_size])
     alpha = L.reshape(alpha, [-1, num_heads, 1])
     if attn_drop > 1e-15:
         alpha = L.dropout(alpha,
                           dropout_prob=attn_drop,
                           is_test=is_test,
                           dropout_implementation="upscale_in_train")
     h = h * alpha
     h = L.reshape(h, [-1, num_heads * hidden_size])
     h = L.lod_reset(h, old_h)
     return L.sequence_pool(h, "sum")
Beispiel #5
0
    def recv_score_v_spmm(msg):
        score = msg["score"]
        score = paddle_helper.sequence_softmax(score)
        score = layers.dropout(score,
                               dropout_prob=dropout_rate,
                               dropout_implementation="upscale_in_train",
                               is_test=False)

        score = L.reshape(score, [-1, n_head, 1])
        _v = msg["value"]
        _new_v = L.reshape(_v, [-1, n_head, _v.shape[-1] // n_head])

        _new_v = _new_v * score

        _new_v = L.reshape(_new_v, [-1, _v.shape[-1]])
        _new_v = L.lod_reset(_new_v, _v)
        return L.sequence_pool(_new_v, "sum")
Beispiel #6
0
    def reduce_attention(msg):
        alpha = msg["alpha"]  # lod-tensor (batch_size, seq_len, num_heads)
        h = msg["v"]
        alpha = paddle_helper.sequence_softmax(alpha)
        old_h = h

        if attn_drop > 1e-15:
            alpha = fluid.layers.dropout(
                alpha,
                dropout_prob=attn_drop,
                is_test=is_test,
                dropout_implementation="upscale_in_train")
        h = h * alpha
        h = fluid.layers.lod_reset(h, old_h)
        h = fluid.layers.sequence_pool(h, "sum")
        if concat:
            h = fluid.layers.reshape(h, [-1, num_heads * hidden_size])
        else:
            h = fluid.layers.reduce_mean(h, dim=1)
        return h
Beispiel #7
0
    def recv_func(message):
        # feature of src and dst node on each edge
        dst_feat = message['dst_node_feat']
        src_feat = message['src_node_feat']
        # feature of center node
        x = fluid.layers.sequence_pool(dst_feat, 'average')
        # feature of neighbors of center node
        z = fluid.layers.sequence_pool(src_feat, 'average')

        # compute gate
        feat_gate = message['feat_gate']
        g_max = fluid.layers.sequence_pool(feat_gate, 'max')
        g = fluid.layers.concat([x, g_max, z], axis=1)
        g = fluid.layers.fc(g, heads, bias_attr=False, act="sigmoid")

        # softmax
        alpha = message['alpha']
        alpha = paddle_helper.sequence_softmax(alpha)  # E * M

        feat_value = message['feat_value']  # E * (M * D2)
        old = feat_value
        feat_value = fluid.layers.reshape(
            feat_value, [-1, heads, hidden_size_v])  # E * M * D2
        feat_value = fluid.layers.elementwise_mul(feat_value, alpha, axis=0)
        feat_value = fluid.layers.reshape(
            feat_value, [-1, heads * hidden_size_v])  # E * (M * D2)
        feat_value = fluid.layers.lod_reset(feat_value, old)

        feat_value = fluid.layers.sequence_pool(feat_value,
                                                'sum')  # N * (M * D2)

        feat_value = fluid.layers.reshape(
            feat_value, [-1, heads, hidden_size_v])  # N * M * D2

        output = fluid.layers.elementwise_mul(feat_value, g, axis=0)
        output = fluid.layers.reshape(
            output, [-1, heads * hidden_size_v])  # N * (M * D2)

        output = fluid.layers.concat([x, output], axis=1)

        return output
Beispiel #8
0
    def recv_func(message):
        # 每条边的终点的特征
        dst_feat = message['dst_node_feat']
        # 每条边的出发点的特征
        src_feat = message['src_node_feat']
        # 每个中心点自己的特征
        x = L.sequence_pool(dst_feat, 'average')
        # 每个中心点的邻居的特征的平均值
        z = L.sequence_pool(src_feat, 'average')

        # 计算 gate
        feat_gate = message['feat_gate']
        g_max = L.sequence_pool(feat_gate, 'max')
        g = L.concat([x, g_max, z], axis=1)
        g = L.fc(g, heads, bias_attr=False, act="sigmoid")

        # softmax
        alpha = message['alpha']
        alpha = paddle_helper.sequence_softmax(alpha)  # E * M

        feat_value = message['feat_value']  # E * (M * D2)
        old = feat_value
        feat_value = L.reshape(feat_value,
                               [-1, heads, hidden_size_v])  # E * M * D2
        feat_value = L.elementwise_mul(feat_value, alpha, axis=0)
        feat_value = L.reshape(feat_value,
                               [-1, heads * hidden_size_v])  # E * (M * D2)
        feat_value = L.lod_reset(feat_value, old)

        feat_value = L.sequence_pool(feat_value, 'sum')  # N * (M * D2)

        feat_value = L.reshape(feat_value,
                               [-1, heads, hidden_size_v])  # N * M * D2

        output = L.elementwise_mul(feat_value, g, axis=0)
        output = L.reshape(output, [-1, heads * hidden_size_v])  # N * (M * D2)

        output = L.concat([x, output], axis=1)

        return output
Beispiel #9
0
    def recv_func(message):
        dst_feat = message[
            'dst_node_feat']  # feature of dst nodes on each edge
        src_feat = message[
            'src_node_feat']  # feature of src nodes on each edge
        x = fluid.layers.sequence_pool(dst_feat,
                                       'average')  # feature of center nodes
        z = fluid.layers.sequence_pool(src_feat,
                                       'average')  # mean feature of neighbors

        # compute gate
        feat_gate = message['feat_gate']
        g_max = fluid.layers.sequence_pool(feat_gate, 'max')
        g = fluid.layers.concat([x, g_max, z], axis=1)
        g = fluid.layers.fc(g, heads, bias_attr=False, act='sigmoid')

        # softmax of attention coefficient
        alpha = message['alpha']
        alpha = paddle_helper.sequence_softmax(alpha)

        feat_value = message['feat_value']
        old = feat_value
        feat_value = fluid.layers.reshape(feat_value,
                                          [-1, heads, hidden_size_v])
        feat_value = fluid.layers.elementwise_mul(feat_value, alpha, axis=0)
        feat_value = fluid.layers.reshape(feat_value,
                                          [-1, heads * hidden_size_v])
        feat_value = fluid.layers.lod_reset(feat_value, old)

        feat_value = fluid.layers.sequence_pool(feat_value, 'sum')
        feat_value = fluid.layers.reshape(feat_value,
                                          [-1, heads, hidden_size_v])
        output = fluid.layers.elementwise_mul(feat_value, g, axis=0)
        output = fluid.layers.reshape(output, [-1, heads * hidden_size_v])
        output = fluid.layers.concat([x, output], axis=1)

        return output
Beispiel #10
0
 def softmax_agg_inside(msg):
     alpha = paddle_helper.sequence_softmax(msg, beta)
     msg = msg * alpha
     return fluid.layers.sequence_pool(msg, "sum")