def forward(self, user_feat, item_feat, rating_feat):
        r_emb = rating_feat[self.rating]
        i_emb = item_feat[self.row_idxs]
        u_emb = user_feat[self.col_idxs]

        # original peper formula (2)
        x = t.cat([i_emb, r_emb], dim=1)
        x_ia = self.gv(x)

        weight = self.att(x_ia, u_emb).view(-1, 1)
        value = edge_softmax(self.vu_g, weight)

        self.vu_g.edata['h'] = x_ia * value

        self.vu_g.update_all(message_func=fn.copy_edge(edge='h', out='m'), \
            reduce_func=fn.sum(msg='m', out='n_f'))

        h = self.vu_g.ndata['n_f'][:self.userNum]

        if self.act is None:
            hi = self.w(h)
        else:
            hi = self.act(self.w(h))

        return hi
Beispiel #2
0
    def forward(self, user_feat, item_feat, rating_feat):
        r_emb = rating_feat[self.rating]
        u_emb = user_feat[self.row_idxs]
        i_emb = item_feat[self.col_idxs]

        # original peper formula (15)
        x = t.cat([u_emb, r_emb], dim=1)
        f_jt = self.gu(x)

        # f_jt = F.relu(self.w_r1(t.cat([u_emb, r_emb], dim=1)))
        weight = self.att(f_jt, i_emb).view(-1, 1)
        value = edge_softmax(self.uv_g, weight)

        self.uv_g.edata['h'] = f_jt * value

        self.uv_g.update_all(message_func=fn.copy_edge(edge='h', out='m'), \
            reduce_func=fn.sum(msg='m', out='n_f'))

        z = self.uv_g.ndata['n_f'][self.userNum:]

        if self.act is None:
            z = self.w(z)
        else:
            z = self.act(self.w(z))
        return z
Beispiel #3
0
    def forward(self, graph, node_feat, edge_feat):
        with graph.local_scope():
            h_src = h_dst = node_feat
            feat_src = feat_dst = self.fc(h_src).view(-1, self._edata_channels, self._out_feats)
            e_feat = self.edge_fc(edge_feat).view(-1, self._edata_channels, 1)
            graph.edata.update({'feat': e_feat})
            el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1)
            er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1)
            graph.srcdata.update({'feat': feat_src, 'el': el})
            graph.dstdata.update({'er': er})

            # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
            graph.apply_edges(fn.u_add_v('el', 'er', 'e'))
            e = graph.edata.pop('e') * e_feat
            e = self.leaky_relu(e)

            # compute softmax
            graph.edata['a'] = edge_softmax(graph, e)

            # message passing
            def message_func(edges):
                feat_with_e = th.cat([edges.src['feat'], edges.data['feat']], 2)
                # apply a fc layer to adjust the dim of node feat that concatenate E_p to the out_feat_dim
                feat_with_e = self.nfeat_with_e_fc(feat_with_e)
                return {'m': edges.data['a'] * feat_with_e}

            graph.update_all(message_func,
                             fn.sum('m', 'ft'))
            rst = graph.dstdata['ft']
            rst = th.sigmoid(rst)
            return rst
Beispiel #4
0
    def forward(self, g, feat):
        """
        :param g: DGLGraph 二分图(只包含一种关系)
        :param feat: tensor(N_src, d_in) or (tensor(N_src, d_in), tensor(N_dst, d_in)) 输入特征
        :return: tensor(N_dst, K*d_out) 该关系关于目标顶点的表示
        """
        with g.local_scope():
            feat_src, feat_dst = expand_as_pair(feat, g)
            feat_src = self.fc_src(self.feat_drop(feat_src)).view(-1, self.num_heads, self.out_dim)
            feat_dst = self.fc_dst(self.feat_drop(feat_dst)).view(-1, self.num_heads, self.out_dim)

            # a^T (z_u || z_v) = (a_l^T || a_r^T) (z_u || z_v) = a_l^T z_u + a_r^T z_v = el + er
            el = (feat_src * self.attn_src[:, :self.out_dim]).sum(dim=-1, keepdim=True)  # (N_src, K, 1)
            er = (feat_dst * self.attn_src[:, self.out_dim:]).sum(dim=-1, keepdim=True)  # (N_dst, K, 1)
            g.srcdata.update({'ft': feat_src, 'el': el})
            g.dstdata['er'] = er
            g.apply_edges(fn.u_add_v('el', 'er', 'e'))
            e = self.leaky_relu(g.edata.pop('e'))
            g.edata['a'] = edge_softmax(g, e)  # (E, K, 1)

            # 消息传递
            g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft'))
            ret = g.dstdata['ft'].view(-1, self.num_heads * self.out_dim)
            if self.activation:
                ret = self.activation(ret)
            return ret
Beispiel #5
0
def calc_author_citation(g):
    """使用论文引用数加权求和计算学者引用数

    :param g: DGLGraph 学者-论文二分图
    :return: tensor(N_author) 学者引用数
    """
    import dgl.function as fn
    from dgl.ops import edge_softmax
    with g.local_scope():
        # 第k作者的权重为1/k,最后一个视为通讯作者,权重为1/2
        g.edges['writes'].data['w'] = 1.0 / g.edges['writes'].data['order']
        g.update_all(fn.copy_e('w', 'w'), fn.min('w', 'mw'), etype='writes')
        g.apply_edges(fn.copy_u('mw', 'mw'), etype='writes_rev')
        w, mw = g.edges['writes'].data.pop(
            'w'), g.edges['writes_rev'].data.pop('mw')
        w[w == mw] = 0.5

        # 每篇论文所有作者的权重归一化,每个学者所有论文的引用数加权求和
        p = edge_softmax(g['author', 'writes', 'paper'],
                         torch.log(w).unsqueeze(dim=1))
        g.edges['writes_rev'].data['p'] = p.squeeze(dim=1)
        g.update_all(fn.u_mul_e('citation', 'p', 'c'),
                     fn.sum('c', 'c'),
                     etype='writes_rev')
        return g.nodes['author'].data['c']
Beispiel #6
0
    def forward(self, g, feat_src, feat_dst):
        """
        :param g: DGLGraph 邻居-目标顶点二分图
        :param feat_src: tensor(N_src, d) 邻居顶点输入特征
        :param feat_dst: tensor(N_dst, d) 目标顶点输入特征
        :return: tensor(N_dst, d) 目标顶点输出特征
        """
        with g.local_scope():
            # HeCo作者代码中使用attn_drop的方式与原始GAT不同,这样是不对的,却能顶点聚类提升性能……
            attn_l = self.attn_drop(self.attn_l)
            attn_r = self.attn_drop(self.attn_r)
            el = (feat_src * attn_l).sum(dim=-1).unsqueeze(dim=-1)  # (N_src, 1)
            er = (feat_dst * attn_r).sum(dim=-1).unsqueeze(dim=-1)  # (N_dst, 1)
            g.srcdata.update({'ft': feat_src, 'el': el})
            g.dstdata['er'] = er
            g.apply_edges(fn.u_add_v('el', 'er', 'e'))
            e = self.leaky_relu(g.edata.pop('e'))
            g.edata['a'] = edge_softmax(g, e)  # (E, 1)

            # 消息传递
            g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft'))
            ret = g.dstdata['ft']
            if self.activation:
                ret = self.activation(ret)
            return ret
Beispiel #7
0
    def forward(self, graph, memory, ts):
        graph = graph.local_var()  # Using local scope for graph
        if not self._allow_zero_in_degree:
            if (graph.in_degrees() == 0).any():
                raise DGLError(
                    'There are 0-in-degree nodes in the graph, '
                    'output for those nodes will be invalid. '
                    'This is harmful for some applications, '
                    'causing silent performance regression. '
                    'Adding self-loop on the input graph by '
                    'calling `g = dgl.add_self_loop(g)` will resolve '
                    'the issue. Setting ``allow_zero_in_degree`` '
                    'to be `True` when constructing this module will '
                    'suppress the check and let the code run.')

        #print("Shape: ",memory.shape,ts.shape)
        graph.srcdata.update({'s': memory, 'timestamp': ts})
        graph.dstdata.update({'s': memory, 'timestamp': ts})

        # Dot product Calculate the attentio weight
        graph.apply_edges(self.weight_fn)

        # Edge softmax
        graph.edata['sa'] = edge_softmax(
            graph, graph.edata['a']) / (self._out_feats**0.5)

        # Update dst node Here msg_fn include edge feature
        graph.update_all(self.msg_fn, fn.sum('attn', 'agg_u'))

        rst = graph.dstdata['agg_u']
        # Implement skip connection
        rst = self.merge(rst.view(-1, self._num_heads * self._out_feats),
                         graph.dstdata['s'])
        return rst
Beispiel #8
0
    def forward(self, graph, feat):
        '''

        :param graph: DGLGraph
        :param feat: <N, b, F>
        :return:
        '''
        with graph.local_scope():
            N, b, _ = feat.size()
            graph = graph.local_var()
            graph = graph.to(feat.device)
            feat = torch.cat([self.fc1(feat[:get_Parameter('taxi_size')]), self.fc2(feat[get_Parameter('taxi_size'):])], dim=0)
            feat_src = feat_dst = feat.view(N, b, self._num_heads, self._out_feats)
            #feat_src = feat_dst = self.fc(feat).view(N, b, self._num_heads, self._out_feats)
            el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1)
            er = (feat_dst * self.attn_l).sum(dim=-1).unsqueeze(-1)
            graph.srcdata.update({'ft': feat_src, 'el': el})
            graph.dstdata.update({'er': er})

            graph.apply_edges(fn.u_add_v('el', 'er', 'e'))
            #graph.apply_edges(fn.u_mul_e('e', 'w', 'e'))
            e = self.leaky_relu(graph.edata.pop('e'))
            graph.edata['a'] = self.attn_drop(edge_softmax(graph, e))
            #print(graph.edata['a'].size())
            graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft'))
            rst = graph.dstdata['ft']
            rst = rst.reshape(N, -1, self._num_heads*self._out_feats)
            return rst, graph.edata['a']
Beispiel #9
0
def test_edge_softmax(g, norm_by, shp, idtype):
    g = g.astype(idtype).to(F.ctx())
    edata = F.tensor(np.random.rand(g.number_of_edges(), *shp))
    e1 = F.attach_grad(F.clone(edata))

    with F.record_grad():
        score1 = edge_softmax(g, e1, norm_by=norm_by)
        F.backward(F.reduce_sum(score1))
        grad_edata = F.grad(e1)

    with F.record_grad():
        e2 = F.attach_grad(F.clone(edata))
        e2_2d = F.reshape(
            e2,
            (g.number_of_src_nodes(), g.number_of_dst_nodes(), *e2.shape[1:]))
        if norm_by == 'src':
            score2 = F.softmax(e2_2d, 1)
            score2 = F.reshape(score2, (-1, *e2.shape[1:]))
        if norm_by == 'dst':
            score2 = F.softmax(e2_2d, 0)
            score2 = F.reshape(score2, (-1, *e2.shape[1:]))
        assert F.allclose(score1, score2)
        print('forward passed')

        F.backward(F.reduce_sum(score2))
        assert F.allclose(F.grad(e2), grad_edata)
        print('backward passed')
Beispiel #10
0
    def forward(self, g, feat):
        with g.local_scope():
            if self.aggre_type == 'attention':
                if isinstance(feat, tuple):
                    h_src = self.feat_drop(feat[0]).view(
                        -1, self.num_heads, self.in_size)
                    h_dst = self.feat_drop(feat[1]).view(
                        -1, self.num_heads, self.in_size)
                el = (h_src * self.attn_l).sum(dim=-1).unsqueeze(-1)
                g.srcdata.update({'ft': h_src, 'el': el})
                g.apply_edges(fn.copy_u('el', 'e'))
                e = self.leaky_relu(g.edata.pop('e'))
                g.edata['a'] = self.attn_drop(edge_softmax(g, e))
                g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft'))
                rst = g.dstdata['ft'].flatten(1)
                if self.residual:
                    rst = rst + h_dst
                if self.activation:
                    rst = self.activation(rst)

            elif self.aggre_type == 'mean':
                h_src = self.feat_drop(feat[0]).view(
                    -1, self.in_size * self.num_heads)
                g.srcdata['ft'] = h_src
                g.update_all(fn.copy_u('ft', 'm'), fn.mean('m', 'ft'))
                rst = g.dstdata['ft']

            elif self.aggre_type == 'pool':
                h_src = self.feat_drop(feat[0]).view(
                    -1, self.in_size * self.num_heads)
                g.srcdata['ft'] = F.relu(self.fc_pool(h_src))
                g.update_all(fn.copy_u('ft', 'm'), fn.mean('m', 'ft'))
                rst = g.dstdata['ft']
            return rst
Beispiel #11
0
    def forward(self, g, feat):
        """
        :param g: DGLGraph 二分图(只包含一种关系)
        :param feat: tensor(N_src, d_in) or (tensor(N_src, d_in), tensor(N_dst, d_in)) 输入特征
        :return: tensor(N_dst, d_out) 目标顶点该关于关系的表示
        """
        with g.local_scope():
            feat_src, feat_dst = expand_as_pair(feat, g)
            # (N_src, d_in) -> (N_src, d_out) -> (N_src, K, d_out/K)
            k = self.k_linear(feat_src).view(-1, self.num_heads, self.d_k)
            v = self.v_linear(feat_src).view(-1, self.num_heads, self.d_k)
            q = self.q_linear(feat_dst).view(-1, self.num_heads, self.d_k)

            # k[:, h] @= w_att[h] => k[n, h, j] = ∑(i) k[n, h, i] * w_att[h, i, j]
            k = torch.einsum('nhi,hij->nhj', k, self.w_att)
            v = torch.einsum('nhi,hij->nhj', v, self.w_msg)

            g.srcdata.update({'k': k, 'v': v})
            g.dstdata['q'] = q
            g.apply_edges(fn.v_dot_u('q', 'k', 't'))  # g.edata['t']: (E, K, 1)
            attn = g.edata.pop('t').squeeze(dim=-1) * self.mu / math.sqrt(
                self.d_k)
            attn = edge_softmax(g, attn)  # (E, K)
            self.attn = attn.detach()
            g.edata['t'] = attn.unsqueeze(dim=-1)  # (E, K, 1)

            g.update_all(fn.u_mul_e('v', 't', 'm'), fn.sum('m', 'h'))
            out = g.dstdata['h'].view(-1, self.out_dim)  # (N_dst, d_out)
            return out
    def forward(self, g, node_feat, edge_feat):
        """
        :param g: DGLGraph 基于给定元路径的邻居组成的图,每条边表示一个元路径实例
        :param node_feat: tensor(N, d_in) 输入顶点特征,N为g的终点个数
        :param edge_feat: tensor(E, L, d_in) 元路径实例特征(由中间顶点的特征组成),E为g的边数,L为元路径长度
        :return: tensor(N, K, d_out) 输出顶点特征,K为注意力头数
        """
        # 与GAT/HAN顶点层次的注意力的区别:注意力对象由基于元路径的邻居改为元路径实例,考虑了元路径实例的中间顶点
        with g.local_scope():
            edge_feat = self.encoder(edge_feat)  # (E, L, d_in) -> (E, K*d_out)
            edge_feat = edge_feat.view(-1, self.num_heads,
                                       self.out_dim)  # (E, K, d_out)
            # a^T (h_p || h_v) = (a_l^T || a_r^T) (h_p || h_v) = a_l^T h_p + a_r^T h_v = el + er
            el = (edge_feat * self.attn_l).sum(dim=-1).unsqueeze(
                dim=-1)  # (E, K, 1)
            er = self.attn_r(node_feat).unsqueeze(dim=-1)  # (N, K, 1)
            g.edata.update({'ft': edge_feat, 'el': el})
            g.dstdata['er'] = er
            g.apply_edges(fn.e_add_v('el', 'er', 'e'))
            e = self.leaky_relu(g.edata.pop('e'))
            g.edata['a'] = self.attn_drop(edge_softmax(g, e))  # (E, K, 1)

            # 消息传递
            g.update_all(
                lambda edges: {'m': edges.data['ft'] * edges.data['a']},
                fn.sum('m', 'ft'))
            ret = g.dstdata['ft']
            if self.activation:
                ret = self.activation(ret)
            return ret
Beispiel #13
0
    def forward(self, g, feat):
        """
        :param g: DGLGraph 同构图
        :param feat: tensor(N_src, d_in) 输入顶点特征
        :return: tensor(N_dst, K, d_out) 输出顶点特征
        """
        with g.local_scope():
            feat_src = self.fc(self.feat_drop(feat)).view(
                -1, self.num_heads, self.out_dim)
            feat_dst = feat_src[:g.num_dst_nodes()] if g.is_block else feat_src
            e = self.leaky_relu(self.attn(g, feat_src, feat_dst))  # (E, K, 1)
            g.edata['a'] = self.attn_drop(edge_softmax(g, e))  # (E, K, 1)
            g.srcdata['ft'] = feat_src
            # 消息传递
            g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft'))
            out = g.dstdata['ft']  # (N_dst, K, d_out)

            if self.training:
                # 负采样
                neg_g = dgl.graph(self.neg_sampler(g,
                                                   list(range(g.num_edges()))),
                                  num_nodes=g.num_nodes(),
                                  device=g.device)
                neg_e = self.attn(neg_g, feat_src, feat_src)  # (E', K, 1)
                self.attn_x = torch.cat([e, neg_e]).squeeze(dim=-1).mean(
                    dim=1)  # (E+E',)
                self.attn_y = torch.cat([torch.ones(e.shape[0]), torch.zeros(neg_e.shape[0])]) \
                    .to(self.attn_x.device)

            if self.activation:
                out = self.activation(out)
            return out
Beispiel #14
0
    def forward(
            self,
            value: Union[Tensor, Dict[str,
                                      Tensor]],  # edge features (may be fused)
            key: Union[Tensor, Dict[str,
                                    Tensor]],  # edge features (may be fused)
            query: Dict[str, Tensor],  # node features
            graph: DGLGraph):
        with nvtx_range('AttentionSE3'):
            with nvtx_range('reshape keys and queries'):
                if isinstance(key, Tensor):
                    # case where features of all types are fused
                    key = key.reshape(key.shape[0], self.num_heads, -1)
                    # need to reshape queries that way to keep the same layout as keys
                    out = torch.cat(
                        [query[str(d)] for d in self.key_fiber.degrees],
                        dim=-1)
                    query = out.reshape(
                        list(query.values())[0].shape[0], self.num_heads, -1)
                else:
                    # features are not fused, need to fuse and reshape them
                    key = self.key_fiber.to_attention_heads(
                        key, self.num_heads)
                    query = self.key_fiber.to_attention_heads(
                        query, self.num_heads)

            with nvtx_range('attention dot product + softmax'):
                # Compute attention weights (softmax of inner product between key and query)
                edge_weights = dgl.ops.e_dot_v(graph, key, query).squeeze(-1)
                edge_weights = edge_weights / np.sqrt(
                    self.key_fiber.num_features)
                edge_weights = edge_softmax(graph, edge_weights)
                edge_weights = edge_weights[..., None, None]

            with nvtx_range('weighted sum'):
                if isinstance(value, Tensor):
                    # features of all types are fused
                    v = value.view(value.shape[0], self.num_heads, -1,
                                   value.shape[-1])
                    weights = edge_weights * v
                    feat_out = dgl.ops.copy_e_sum(graph, weights)
                    feat_out = feat_out.view(feat_out.shape[0], -1,
                                             feat_out.shape[-1])  # merge heads
                    out = unfuse_features(feat_out, self.value_fiber.degrees)
                else:
                    out = {}
                    for degree, channels in self.value_fiber:
                        v = value[str(degree)].view(-1, self.num_heads,
                                                    channels // self.num_heads,
                                                    degree_to_dim(degree))
                        weights = edge_weights * v
                        res = dgl.ops.copy_e_sum(graph, weights)
                        out[str(degree)] = res.view(
                            -1, channels, degree_to_dim(degree))  # merge heads

                return out
Beispiel #15
0
    def forward(self, G, h):
        with G.local_scope():
            node_dict, edge_dict = self.node_dict, self.edge_dict
            for srctype, etype, dsttype in G.canonical_etypes:
                sub_graph = G[srctype, etype, dsttype]

                k_linear = self.k_linears[node_dict[srctype]]
                v_linear = self.v_linears[node_dict[srctype]]
                q_linear = self.q_linears[node_dict[dsttype]]

                k = k_linear(h[srctype]).view(-1, self.n_heads, self.d_k)
                v = v_linear(h[srctype]).view(-1, self.n_heads, self.d_k)
                q = q_linear(h[dsttype]).view(-1, self.n_heads, self.d_k)

                e_id = self.edge_dict[etype]

                relation_att = self.relation_att[e_id]
                relation_pri = self.relation_pri[e_id]
                relation_msg = self.relation_msg[e_id]

                k = torch.einsum("bij,ijk->bik", k, realtion_att)
                v = torch.einsum("bij,ijk->bik", k, relation_msg)

                sub_graph.srcdata['k'] = k
                sub_graph.dstdata['q'] = q
                sub_graph.srcdata['v'] = v

                sub_graph.apply_edges(fn.v_dot_u('q', 'k', 't'))
                attn_score = sub_graph.edata.pop('t').sum(
                    -1) * relation_pri / self.sqrt_dk
                attn_score = edge_softmax(sub_graph, attn_score, norm_by='dst')

                sub_graph.edata['t'] = attn_score.unsqueeze(-1)

            G.multi_update_all({etype : (fn.u_mul_e('v', 't', 'm'), fn.sum('m', 't')) \
                                for etype in edge_dict}, cross_reducer = 'mean')

            new_h = {}
            for ntype in G.ntypes:
                '''
                    Step 3: Target-specific Aggregation
                    x = norm( W[node_type] * gelu( Agg(x) ) + x )
                '''
                n_id = node_dict[ntype]
                alpha = torch.sigmoid(self.skip[n_id])
                t = G.nodes[ntype].data['t'].view(-1, self.out_dim)
                trans_out = self.drop(self.a_linears[n_id](t))
                trans_out = trans_out * alpha + h[ntype] * (1 - alpha)
                if self.use_norm:
                    new_h[ntype] = self.norms[n_id](trans_out)
                else:
                    new_h[ntype] = trans_out
            return new_h
Beispiel #16
0
 def forward(self, g, feat_src, feat_dst):
     if self.batch_norm_q is not None:
         feat_src = self.batch_norm_q(feat_src)
         feat_dst = self.batch_norm_k(feat_dst)
     if self.feat_drop is not None:
         feat_src = self.feat_drop(feat_src)
         feat_dst = self.feat_drop(feat_dst)
     score = F.u_dot_v(g, feat_src, feat_dst)  # (num_edges, 1)
     weight = F.edge_softmax(g, score)
     rst = F.u_mul_e_sum(g, feat_src, weight)
     rst = th.relu(self.fc(rst))
     return rst
    def forward(self, g, feats):
        """
        :param g: DGLGraph 异构图
        :param feats: Dict[str, tensor(N_i, d_in)] 顶点类型到输入顶点特征的映射
        :return: Dict[str, tensor(N_i, d_out)] 顶点类型到输出特征的映射
        """
        with g.local_scope():
            for stype, etype, dtype in g.canonical_etypes:
                sg = g[stype, etype, dtype]
                feat_src, feat_dst = feats[stype], feats[dtype]

                # (N_i, d_in) -> (N_i, d_out) -> (N_i, K, d_out/K)
                k = self.k_linears[stype](feat_src).view(
                    -1, self.num_heads, self.d_k)
                v = self.v_linears[stype](feat_src).view(
                    -1, self.num_heads, self.d_k)
                q = self.q_linears[dtype](feat_dst).view(
                    -1, self.num_heads, self.d_k)

                # k[:, h] @= w_att[h] => k[n, h, j] = ∑(i) k[n, h, i] * w_att[h, i, j]
                k = torch.einsum('nhi,hij->nhj', k, self.w_att[etype])
                v = torch.einsum('nhi,hij->nhj', v, self.w_msg[etype])

                sg.srcdata.update({'k': k, f'v_{etype}': v})
                sg.dstdata['q'] = q

                # 第1步:异构互注意力
                sg.apply_edges(fn.v_dot_u('q', 'k',
                                          't'))  # sg.edata['t']: (E, K, 1)
                attn = sg.edata.pop('t').squeeze(
                    dim=-1) * self.mu[etype] / math.sqrt(self.d_k)
                attn = edge_softmax(sg, attn)  # (E, K)
                sg.edata['t'] = attn.unsqueeze(dim=-1)

            # 第2步:异构消息传递+目标相关的聚集
            g.multi_update_all(
                {
                    etype:
                    (fn.u_mul_e(f'v_{etype}', 't', 'm'), fn.sum('m', 'h'))
                    for etype in g.etypes
                }, 'mean')

            # 第3步:残差连接
            out_feats = {}
            for ntype in g.ntypes:
                alpha = torch.sigmoid(self.skip[ntype])
                h = g.nodes[ntype].data['h'].view(-1, self.out_dim)
                trans_out = self.drop(self.a_linears[ntype](h))
                out = alpha * trans_out + (1 - alpha) * feats[ntype]
                out_feats[ntype] = self.norms[ntype](
                    out) if self.use_norm else out
            return out_feats
Beispiel #18
0
 def forward(self, sg, feat):
     if self.batch_norm is not None:
         feat = self.batch_norm(feat)
     feat = self.feat_drop(feat)
     q = self.fc_q(feat)
     k = self.fc_k(feat)
     v = self.fc_v(feat)
     e = F.u_add_v(sg, q, k)
     e = self.fc_e(th.sigmoid(e))
     a = F.edge_softmax(sg, e)
     rst = F.u_mul_e_sum(sg, v, a)
     if self.activation is not None:
         rst = self.activation(rst)
     return rst
Beispiel #19
0
 def forward(self, graph, feat, soft_label):
     with graph.local_scope():
         if not self._allow_zero_in_degree:
             if (graph.in_degrees() == 0).any():
                 raise DGLError('There are 0-in-degree nodes in the graph, '
                                'output for those nodes will be invalid. '
                                'This is harmful for some applications, '
                                'causing silent performance regression. '
                                'Adding self-loop on the input graph by '
                                'calling `g = dgl.add_self_loop(g)` will resolve '
                                'the issue. Setting ``allow_zero_in_degree`` '
                                'to be `True` when constructing this module will '
                                'suppress the check and let the code run.')
         if self.ptype == 'ind':
             feat_src = h_dst = self.feat_drop(feat)
             el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1)
             er = th.zeros(graph.num_nodes(), device=graph.device)
         elif self.ptype == 'tra':
             feat_src = self.feat_drop(self.fc_emb)
             feat_dst = h_dst = th.zeros(graph.num_nodes(), device=graph.device)
             el = feat_src
             er = feat_dst
         cog_label = soft_label
         graph.srcdata.update({'ft': cog_label, 'el': el})
         graph.dstdata.update({'er': er})
         # # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
         graph.apply_edges(fn.u_add_v('el', 'er', 'e'))
         # graph.edata['e'] = th.ones(graph.num_edges(), device=graph.device)  # non-parameterized PLP
         e = graph.edata.pop('e')
         # compute softmax
         graph.edata['a'] = self.attn_drop(edge_softmax(graph, e))
         att = graph.edata['a'].squeeze()
         # message passing
         graph.update_all(fn.u_mul_e('ft', 'a', 'm'),
                          fn.sum('m', 'ft'))
         if self.mlp_layers > 0:
             rst = th.sigmoid(self.lr_alpha) * graph.dstdata['ft'] + \
                   th.sigmoid(-self.lr_alpha) * self.mlp(feat)
         else:
             rst = graph.dstdata['ft']
         # residual
         if self.res_fc is not None:
             resval = self.res_fc(h_dst)
             rst = rst + resval
         # activation
         if self.activation:
             rst = self.activation(rst)
         return rst, att, th.sigmoid(self.lr_alpha).squeeze(), el.squeeze(), er.squeeze()
Beispiel #20
0
    def forward(self, user_feat, hi):
        trust_emb = user_feat[self.row_idxs]

        trustee_emb = hi[self.col_idxs]

        weight = self.att(trust_emb, trustee_emb).view(-1, 1)

        # value = edge_softmax(self.uu_g, weight, norm_by='src').view(-1)
        value = edge_softmax(self.uu_g, weight).view(-1)

        A = t.sparse.FloatTensor(self.idxs, value, self.shape).detach()
        A = A.transpose(0, 1)

        if self.act is None:
            hs = self.w(t.spmm(A, hi))
        else:
            hs = self.act(self.w(t.spmm(A, hi)))
        return hs
Beispiel #21
0
    def forward(self, graph, feat, soft_label):
        graph = graph.local_var()

        if not self._allow_zero_in_degree:
            if (graph.in_degrees() == 0).any():
                raise DGLError('There are 0-in-degree nodes in the graph, '
                               'output for those nodes will be invalid. '
                               'This is harmful for some applications, '
                               'causing silent performance regression. '
                               'Adding self-loop on the input graph by '
                               'calling `g = dgl.add_self_loop(g)` will resolve '
                               'the issue. Setting ``allow_zero_in_degree`` '
                               'to be `True` when constructing this module will '
                               'suppress the check and let the code run.')
        h_src = feat
        feat_src = feat_dst = self.fc(h_src)
        if graph.is_block:
            feat_dst = feat_src[:graph.number_of_dst_nodes()]

        # Assign features to nodes
        graph.srcdata.update({'ft': feat_src})
        graph.dstdata.update({'ft': feat_dst})
        # Step 1. dot product
        graph.apply_edges(fn.u_dot_v('ft', 'ft', 'a'))
        # graph.edata['a'] = th.ones(graph.num_edges(), device=graph.device)
        # Step 2. edge softmax to compute attention scores
        graph.edata['sa'] = edge_softmax(graph, graph.edata['a'])
        att = graph.edata['sa'].squeeze()
        cog_label = soft_label
        # cog_label = self.fc2(feat)
        # cog_label = th.sigmoid(self.lr_alpha) * soft_label + th.sigmoid(-self.lr_alpha) * self.fc2(feat)
        graph.srcdata.update({'ft': cog_label})
        graph.dstdata.update({'ft': cog_label})
        # Step 3. Broadcast softmax value to each edge, and aggregate dst node
        graph.update_all(fn.u_mul_e('ft', 'sa', 'attn'), fn.sum('attn', 'agg_u'))
        # output results to the destination nodes
        rst = graph.dstdata['agg_u']

        return rst, att, th.sigmoid(self.lr_alpha).squeeze()
Beispiel #22
0
    def forward(self, graph, feat, attn_feat):
        with graph.local_scope():
            if not self._allow_zero_in_degree:
                if (graph.in_degrees() == 0).any():
                    raise DGLError(
                        'There are 0-in-degree nodes in the graph, '
                        'output for those nodes will be invalid. '
                        'This is harmful for some applications, '
                        'causing silent performance regression. '
                        'Adding self-loop on the input graph by '
                        'calling `g = dgl.add_self_loop(g)` will resolve '
                        'the issue. Setting ``allow_zero_in_degree`` '
                        'to be `True` when constructing this module will '
                        'suppress the check and let the code run.')

            h_src = self.feat_drop(feat)
            attn_h_src = self.feat_drop(attn_feat)
            feat_src = self.fc(h_src).view(-1, self._num_heads,
                                           self._out_feats)
            attn_feat_src = attn_feat_dst = self.fc_attn(attn_h_src).view(
                -1, self._num_heads, self._out_feats)
            if graph.is_block:
                attn_feat_dst = attn_feat_src[:graph.number_of_dst_nodes()]
            el = (attn_feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1)
            er = (attn_feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1)
            graph.srcdata.update({'ft': feat_src, 'el': el})
            graph.dstdata.update({'er': er})
            # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
            graph.apply_edges(dgl.function.u_add_v('el', 'er', 'e'))
            e = self.leaky_relu(graph.edata.pop('e'))

            # compute softmax
            graph.edata['a'] = self.attn_drop(edge_softmax(graph, e))
            # message passing
            graph.update_all(dgl.function.u_mul_e('ft', 'a', 'm'),
                             dgl.function.sum('m', 'ft'))
            rst = graph.dstdata['ft']
            return rst
Beispiel #23
0
    def forward(self, graph, feat):
        with graph.local_scope():
            h_src = h_dst = self.feat_drop(feat).view(-1, self._num_heads, self.in_size)

            el = (h_src * self.attn_l).sum(dim=-1).unsqueeze(-1)
            er = (h_dst * self.attn_r).sum(dim=-1).unsqueeze(-1)
            graph.srcdata.update({'ft': h_src, 'el': el})
            graph.dstdata.update({'er': er})
            # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
            graph.apply_edges(fn.u_add_v('el', 'er', 'e'))
            e = self.leaky_relu(graph.edata.pop('e'))
            # compute softmax
            graph.edata['a'] = self.attn_drop(edge_softmax(graph, e))
            # message passing
            graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft'))
            rst = graph.dstdata['ft']
            # residual
            if self.residual:
                rst = rst + h_dst
            # activation
            if self.activation:
                rst = self.activation(rst)
            return rst
Beispiel #24
0
    def forward(self, graph, feat):
        with graph.local_scope():
            if not self._allow_zero_in_degree:
                if (graph.in_degrees() == 0).any():
                    assert False

            if isinstance(feat, tuple):
                h_src = self.feat_drop(feat[0])
                h_dst = self.feat_drop(feat[1])
                if not hasattr(self, "fc_src"):
                    self.fc_src, self.fc_dst = self.fc, self.fc
                feat_src, feat_dst = h_src, h_dst
                feat_src = self.fc_src(h_src).view(-1, self._num_heads,
                                                   self._out_feats)
                feat_dst = self.fc_dst(h_dst).view(-1, self._num_heads,
                                                   self._out_feats)
            else:
                h_src = h_dst = self.feat_drop(feat)
                feat_src, feat_dst = h_src, h_dst
                feat_src = feat_dst = self.fc(h_src).view(
                    -1, self._num_heads, self._out_feats)
                if graph.is_block:
                    feat_dst = feat_src[:graph.number_of_dst_nodes()]

            if self._norm == "both":
                degs = graph.out_degrees().float().clamp(min=1)
                norm = torch.pow(degs, -0.5)
                shp = norm.shape + (1, ) * (feat_src.dim() - 1)
                norm = torch.reshape(norm, shp)
                feat_src = feat_src * norm

            # NOTE: GAT paper uses "first concatenation then linear projection"
            # to compute attention scores, while ours is "first projection then
            # addition", the two approaches are mathematically equivalent:
            # We decompose the weight vector a mentioned in the paper into
            # [a_l || a_r], then
            # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j
            # Our implementation is much efficient because we do not need to
            # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus,
            # addition could be optimized with DGL's built-in function u_add_v,
            # which further speeds up computation and saves memory footprint.
            el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1)
            er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1)
            graph.srcdata.update({"ft": feat_src, "el": el})
            graph.dstdata.update({"er": er})
            # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
            graph.apply_edges(fn.u_add_v("el", "er", "e"))
            e = self.leaky_relu(graph.edata.pop("e"))
            # compute softmax
            graph.edata["a"] = self.attn_drop(edge_softmax(graph, e))
            # message passing
            graph.update_all(fn.u_mul_e("ft", "a", "m"), fn.sum("m", "ft"))
            rst = graph.dstdata["ft"]

            if self._norm == "both":
                degs = graph.in_degrees().float().clamp(min=1)
                norm = torch.pow(degs, 0.5)
                shp = norm.shape + (1, ) * (feat_dst.dim() - 1)
                norm = torch.reshape(norm, shp)
                rst = rst * norm

            # residual
            if self.res_fc is not None:
                resval = self.res_fc(h_dst).view(h_dst.shape[0], -1,
                                                 self._out_feats)
                rst = rst + resval
            # activation
            if self._activation is not None:
                rst = self._activation(rst)
            return rst
Beispiel #25
0
    def forward(self, graph, feat):
        r"""Compute graph attention network layer.

        Parameters
        ----------
        graph : DGLGraph
            The graph.
        feat : torch.Tensor or pair of torch.Tensor
            If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where
            :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes.
            If a pair of torch.Tensor is given, the pair must contain two tensors of shape
            :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`.

        Returns
        -------
        torch.Tensor
            The output feature of shape :math:`(N, H, D_{out})` where :math:`H`
            is the number of heads, and :math:`D_{out}` is size of output feature.
        """
        graph = graph.local_var()
        if isinstance(feat, tuple):
            h_src = self.feat_drop(feat[0])
            h_dst = self.feat_drop(feat[1])
            feat_src = self.fc_src(h_src).view(-1, self._num_heads,
                                               self._out_feats)
            feat_dst = self.fc_dst(h_dst).view(-1, self._num_heads,
                                               self._out_feats)
        else:
            h_src = h_dst = self.feat_drop(feat)
            feat_src = feat_dst = self.fc(h_src).view(-1, self._num_heads,
                                                      self._out_feats)

        if self.opt['att_type'] == "GAT":
            # NOTE: GAT paper uses "first concatenation then linear projection"
            # to compute attention scores, while ours is "first projection then
            # addition", the two approaches are mathematically equivalent:
            # We decompose the weight vector a mentioned in the paper into
            # [a_l || a_r], then
            # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j
            # Our implementation is much efficient because we do not need to
            # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus,
            # addition could be optimized with DGL's built-in function u_add_v,
            # which further speeds up computation and saves memory footprint.
            el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1)
            er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1)
            graph.srcdata.update({'ft': feat_src, 'el': el})
            graph.dstdata.update({'er': er})
            # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
            graph.apply_edges(fn.u_add_v('el', 'er', 'e'))
            e = self.leaky_relu(graph.edata.pop('e'))
        elif self.opt['att_type'] == "cosine":
            el = feat_src * self.attn_l
            er = feat_dst * self.attn_r
            graph.srcdata.update({'ft': feat_src, 'el': el})
            graph.dstdata.update({'er': er})
            graph.srcdata['norm_h'] = F.normalize(el, p=2, dim=-1)
            graph.dstdata['norm_h'] = F.normalize(er, p=2, dim=-1)
            # compute cosine distance
            graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos'))
            e = graph.edata.pop('cos')
        elif self.opt['att_type'] == "scaled_dot":
            el = feat_src * self.attn_l
            er = feat_dst * self.attn_r / th.sqrt(
                th.tensor(self.opt['num_hidden'] / self.opt['num_heads']))
            graph.srcdata.update({'ft': feat_src, 'el': el})
            graph.dstdata.update({'er': er})
            # compute dot
            graph.apply_edges(fn.u_dot_v('el', 'er', 'dot'))
            e = graph.edata.pop('dot')
        elif self.opt['att_type'] == "pearson":
            el = feat_src * self.attn_l
            er = feat_dst * self.attn_r
            graph.srcdata.update({'ft': feat_src, 'el': el})
            graph.dstdata.update({'er': er})
            src_mu = th.mean(el, dim=1, keepdim=True)
            graph.srcdata['norm_h'] = F.normalize(el - src_mu, p=2, dim=-1)
            dst_mu = th.mean(er, dim=1, keepdim=True)
            graph.dstdata['norm_h'] = F.normalize(er - dst_mu, p=2, dim=-1)
            # compute cosine distance
            graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos'))
            e = graph.edata.pop('cos')
        elif self.opt['att_type'] == "spearman":
            #todo check all these operations
            el = feat_src * self.attn_l
            er = feat_dst * self.attn_r
            graph.srcdata.update({'ft': feat_src, 'el': el})
            graph.dstdata.update({'er': er})

            el = el.view(-1, self._out_feats)
            er = er.view(-1, self._out_feats)

            el = soft_rank(el, regularization_strength=1.0)
            er = soft_rank(er, regularization_strength=1.0)

            ranked_src = soft_rank(
                1000 *
                F.normalize(el, p=2, dim=-1))  #, regularization_strength=0.1)
            ranked_dst = soft_rank(1000 * F.normalize(er, p=2, dim=-1),
                                   regularization_strength=0.1)
            src_mu = th.mean(ranked_src, dim=1, keepdim=True)
            dst_mu = th.mean(ranked_dst, dim=1, keepdim=True)

            el = F.normalize(ranked_src - src_mu, p=2, dim=-1)
            er = F.normalize(ranked_dst - dst_mu, p=2, dim=-1)
            el = el.view(-1, self._num_heads, self._out_feats)
            er = er.view(-1, self._num_heads, self._out_feats)
            graph.srcdata['norm_h'] = F.normalize(el, p=2, dim=-1)
            graph.dstdata['norm_h'] = F.normalize(er, p=2, dim=-1)
            # compute cosine distance
            graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos'))
            e = graph.edata.pop('cos')

        # compute softmax
        graph.edata['a'] = self.attn_drop(edge_softmax(graph, e))
        # message passing
        graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft'))
        rst = graph.dstdata['ft']

        # residual
        if self.res_fc is not None:
            resval = self.res_fc(h_dst).view(h_dst.shape[0], -1,
                                             self._out_feats)
            rst = rst + resval
        # activation
        if self.activation:
            rst = self.activation(rst)
        return rst
Beispiel #26
0
    def forward(self, graph, feat):
        r"""

        Description
        -----------
        Compute graph attention network layer.

        Parameters
        ----------
        graph : DGLGraph
            The graph.
        feat : torch.Tensor or pair of torch.Tensor
            If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where
            :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes.
            If a pair of torch.Tensor is given, the pair must contain two tensors of shape
            :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`.

        Returns
        -------
        torch.Tensor
            The output feature of shape :math:`(N, H, D_{out})` where :math:`H`
            is the number of heads, and :math:`D_{out}` is size of output feature.

        Raises
        ------
        DGLError
            If there are 0-in-degree nodes in the input graph, it will raise DGLError
            since no message will be passed to those nodes. This will cause invalid output.
            The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``.
        """
        with graph.local_scope():
            if not self._allow_zero_in_degree:
                if (graph.in_degrees() == 0).any():
                    raise DGLError(
                        'There are 0-in-degree nodes in the graph, '
                        'output for those nodes will be invalid. '
                        'This is harmful for some applications, '
                        'causing silent performance regression. '
                        'Adding self-loop on the input graph by '
                        'calling `g = dgl.add_self_loop(g)` will resolve '
                        'the issue. Setting ``allow_zero_in_degree`` '
                        'to be `True` when constructing this module will '
                        'suppress the check and let the code run.')

            if isinstance(feat, tuple):
                h_src = self.feat_drop(feat[0])
                h_dst = self.feat_drop(feat[1])
                if not hasattr(self, 'fc_src'):
                    self.fc_src, self.fc_dst = self.fc, self.fc
                feat_src = self.fc_src(h_src).view(-1, self._num_heads,
                                                   self._out_feats)
                feat_dst = self.fc_dst(h_dst).view(-1, self._num_heads,
                                                   self._out_feats)
            else:
                h_src = h_dst = self.feat_drop(feat)
                feat_src = feat_dst = self.fc(h_src).view(
                    -1, self._num_heads, self._out_feats)
                if graph.is_block:
                    feat_dst = feat_src[:graph.number_of_dst_nodes()]
            # NOTE: GAT paper uses "first concatenation then linear projection"
            # to compute attention scores, while ours is "first projection then
            # addition", the two approaches are mathematically equivalent:
            # We decompose the weight vector a mentioned in the paper into
            # [a_l || a_r], then
            # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j
            # Our implementation is much efficient because we do not need to
            # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus,
            # addition could be optimized with DGL's built-in function u_add_v,
            # which further speeds up computation and saves memory footprint.
            el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1)
            er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1)
            graph.srcdata.update({'ft': feat_src, 'el': el})
            graph.dstdata.update({'er': er})
            # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
            graph.apply_edges(fn.u_add_v('el', 'er', 'e'))
            e = self.leaky_relu(graph.edata.pop('e'))
            # compute softmax
            graph.edata['a'] = self.attn_drop(edge_softmax(graph, e))
            # message passing
            graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft'))
            rst = graph.dstdata['ft']
            # residual
            rst = rst.flatten(1)
            rst_norm = self.layer_norm(rst)

            if self.res_fc is not None:
                resval = self.res_fc(h_dst).view(h_dst.shape[0], -1,
                                                 self._out_feats).flatten(1)
                rst_norm = self.feat_drop(rst_norm) + resval
            # activation
            rst = self.activation(rst_norm)
            rst = rst_norm + self.feat_drop(rst)
            rst = self.ff_layer_norm(rst)
            return rst
Beispiel #27
0
    def forward(self, g, ft_src):
        if self.batch_norm is not None:
            ft_src = {
                ntype: self.batch_norm[ntype](ft)
                for ntype, ft in ft_src.items()
            }
        if self.feat_drop is not None:
            ft_src = {
                ntype: self.feat_drop(ft)
                for ntype, ft in ft_src.items()
            }
        device = next(iter(ft_src.values())).device
        ft_dst = {
            vtype: ft_src[vtype][:g.number_of_dst_nodes(vtype)]
            for vtype in g.dsttypes
        }
        feats = {}
        for vtype, eutypes in self.vtype2eutypes.items():
            src_nid = []
            dst_nid = []
            num_utypes_nodes = 0
            src_val = []
            attn_score = []
            for etype, utype in eutypes:
                sg = g[etype]
                ft_e = (self.edge_embedding[etype](sg.edata['cnt'].to(device))
                        if etype in self.edge_embedding else None)
                e, v = self.edge_aggregate[etype](
                    sg,
                    ft_src[utype],
                    ft_dst[vtype],
                    ft_e=ft_e,
                    return_ev=True,
                )
                uid, vid = sg.all_edges(form='uv', order='eid')
                src_nid.append(uid + num_utypes_nodes)
                dst_nid.append(vid)
                num_utypes_nodes += sg.number_of_src_nodes()
                src_val.append(v)
                attn_score.append(e)
            src_nid = th.cat(src_nid, dim=0)
            dst_nid = th.cat(dst_nid, dim=0)
            edge_softmax_g = dgl.heterograph(data_dict={
                ('utypes', 'etypes', 'vtype'): (src_nid, dst_nid)
            },
                                             num_nodes_dict={
                                                 'utypes':
                                                 num_utypes_nodes,
                                                 'vtype':
                                                 g.number_of_dst_nodes(vtype)
                                             },
                                             device=device)
            src_val = th.cat(src_val,
                             dim=0)  # (num_utypes_nodes, num_heads, num_feats)
            attn_score = th.cat(attn_score, dim=0)  # (num_edges, num_heads, 1)
            attn_weight = F.edge_softmax(edge_softmax_g, attn_score)
            agg = F.u_mul_e_sum(edge_softmax_g, src_val, attn_weight)
            agg = agg.view(g.number_of_dst_nodes(vtype), -1)
            feats[vtype] = self.activation[vtype](self.linear_agg[vtype](agg) +
                                                  self.linear_self[vtype]
                                                  (ft_dst[vtype]))

        return feats
Beispiel #28
0
    def forward(self, graph, feat):
        r"""Compute AGNN layer.

        Parameters
        ----------
        graph : DGLGraph
            The graph.
        feat : torch.Tensor
            The input feature of shape :math:`(N, *)` :math:`N` is the
            number of nodes, and :math:`*` could be of any shape.
            If a pair of torch.Tensor is given, the pair must contain two tensors of shape
            :math:`(N_{in}, *)` and :math:`(N_{out}, *})`, the the :math:`*` in the later
            tensor must equal the previous one.

        Returns
        -------
        torch.Tensor
            The output feature of shape :math:`(N, *)` where :math:`*`
            should be the same as input shape.
        """
        graph = graph.local_var()

        feat_src, feat_dst = expand_as_pair(feat)
        graph.srcdata['h'] = feat_src
        if self.opt['att_type'] == "AGNN":
            graph.srcdata['norm_h'] = F.normalize(feat_src, p=2, dim=-1)
            if isinstance(feat, tuple):
                graph.dstdata['norm_h'] = F.normalize(feat_dst, p=2, dim=-1)
            # compute cosine distance
            graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos'))
            cos = graph.edata.pop('cos')
            e = self.beta * cos
        #SAME AS AGNN
        elif self.opt['att_type'] == "cosine":
            graph.srcdata['norm_h'] = F.normalize(feat_src, p=2, dim=-1)
            if isinstance(feat, tuple):
                graph.dstdata['norm_h'] = F.normalize(feat_dst, p=2, dim=-1)
            # compute cosine distance
            graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos'))
            cos = graph.edata.pop('cos')
            e = self.beta * cos
        elif self.opt['att_type'] == "scaled_dot":
            if isinstance(feat, tuple):
                graph.dstdata['h'] = feat_dst / th.sqrt(th.tensor(self.opt['num_hidden']))
            # compute dot
            graph.apply_edges(fn.u_dot_v('h', 'h', 'dot'))
            dot = graph.edata.pop('dot')
            e = self.beta * dot
        elif self.opt['att_type'] == "pearson":
            src_mu = th.mean(feat_src, dim=1, keepdim=True)
            graph.srcdata['norm_h'] = F.normalize(feat_src - src_mu, p=2, dim=-1)
            if isinstance(feat, tuple):
                dst_mu = th.mean(feat_dst, dim=1, keepdim=True)
                graph.dstdata['norm_h'] = F.normalize(feat_dst - dst_mu, p=2, dim=-1)
            # compute cosine distance
            graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos'))
            cos = graph.edata.pop('cos')
            e = self.beta * cos
        elif self.opt['att_type'] == "spearman":
            # F.normalize(feat_src, p=2, dim=1).detach().numpy()
            ranked_src = soft_rank(1000*F.normalize(feat_src, p=2, dim=-1))#, regularization_strength=0.1)
            src_mu = th.mean(ranked_src, dim=1, keepdim=True)
            graph.srcdata['norm_h'] = F.normalize(ranked_src - src_mu, p=2, dim=-1)
            if isinstance(feat, tuple):
                ranked_dst = soft_rank(1000*F.normalize(feat_dst, p=2, dim=-1), regularization_strength=0.1)
                dst_mu = th.mean(ranked_dst, dim=1, keepdim=True)
                graph.dstdata['norm_h'] = F.normalize(ranked_dst - dst_mu, p=2, dim=-1)
            # compute cosine distance
            graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos'))
            cos = graph.edata.pop('cos')
            e = self.beta * cos

        graph.edata['p'] = edge_softmax(graph, e)
        graph.update_all(fn.u_mul_e('h', 'p', 'm'), fn.sum('m', 'h'))
        return graph.dstdata.pop('h')
    def forward(self, graph, feat):
        with graph.local_scope():
            if not self._allow_zero_in_degree:
                if (graph.in_degrees() == 0).any():
                    assert False
            if isinstance(feat, tuple):
                h_src = self.feat_drop(feat[0])
                h_dst = self.feat_drop(feat[1])
                if not hasattr(self, "fc_src"):
                    self.fc_src, self.fc_dst = self.fc, self.fc
                feat_src, feat_dst = h_src, h_dst
                feat_src = self.fc_src(h_src).view(-1, self._num_heads,
                                                   self._out_feats)
                feat_dst = self.fc_dst(h_dst).view(-1, self._num_heads,
                                                   self._out_feats)
            else:
                h_src = self.feat_drop(feat)
                feat_src = h_src
                feat_src = self.fc(h_src).view(-1, self._num_heads,
                                               self._out_feats)
                if graph.is_block:
                    h_dst = h_src[:graph.number_of_dst_nodes()]
                    feat_dst = feat_src[:graph.number_of_dst_nodes()]
                else:
                    h_dst = h_src
                    feat_dst = feat_src

            if self.training and self.edge_drop > 0:
                perm = torch.randperm(graph.number_of_edges(),
                                      device=graph.device)
                bound = int(graph.number_of_edges() * self.edge_drop)
                eids = perm[bound:]
            else:
                eids = torch.arange(graph.number_of_edges(),
                                    device=graph.device)

            el = (feat_src * self.attn_l).sum(-1).unsqueeze(-1)
            graph.srcdata.update({"ft": feat_src, "el": el})
            # graph.dstdata.update({"er": er})
            # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
            if self.attn_r is not None:
                er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1)
                graph.dstdata.update({"er": er})
                graph.apply_edges(fn.u_add_v("el", "er", "e"))
            else:
                graph.apply_edges(fn.copy_u("el", "e"))
            e = self.leaky_relu(graph.edata.pop("e"))

            # compute softmax

            graph.edata["a"] = torch.zeros_like(e)
            graph.edata["a"][eids] = self.attn_drop(
                edge_softmax(graph, e[eids], eids=eids))
            shp = graph.edata["gcn_norm"].shape + (1, ) * (feat_dst.dim() - 1)
            if self._norm == "sym":
                graph.edata["a"][
                    eids] = graph.edata["a"][eids] * torch.reshape(
                        graph.edata["gcn_norm_adjust"], shp)[eids]
            if self._norm == "avg":
                graph.edata["a"][eids] = (
                    graph.edata["a"][eids] +
                    torch.reshape(graph.edata["gcn_norm"], shp)[eids]) / 2

            hstack = [graph.dstdata["ft"]]

            for _ in range(self._K):
                # message passing
                graph.update_all(fn.u_mul_e("ft", "a", "m"), fn.sum("m", "ft"))

                hstack.append(graph.dstdata["ft"])

            hstack = [
                h + self.position_emb[[k], :, :] for k, h in enumerate(hstack)
            ]
            a_l = (hstack[0] * self.hop_attn_l).sum(dim=-1).unsqueeze(-1)
            astack_r = [(feat_dst * self.hop_attn_r).sum(dim=-1).unsqueeze(-1)
                        for feat_dst in hstack]
            a = torch.cat([(a_r + a_l) for a_r in astack_r], dim=-1)
            # a = torch.sigmoid(a)
            a = self.leaky_relu(a)
            a = F.softmax(a, dim=-1)
            a = self.attn_drop(a)
            # a = F.dropout(a, p=0.5, training=self.training)
            rst = 0
            for i in range(a.shape[-1]):
                rst += hstack[i] * a[:, :, [i]]

            # residual
            if self.res_fc is not None:
                resval = self.res_fc(feat).view(h_dst.shape[0], -1,
                                                self._out_feats)
                rst = rst + resval
            # activation
            if self._activation is not None:
                rst = self._activation(rst)
            return rst
Beispiel #30
0
    def forward(self, graph: dgl.DGLHeteroGraph, feat: tuple,
                dst_node_transformation_weight: nn.Parameter,
                src_node_transformation_weight: nn.Parameter,
                relation_embedding: torch.Tensor,
                relation_transformation_weight: nn.Parameter):
        r"""

        Parameters
        ----------
        graph : specific relational DGLHeteroGraph
        feat : pair of torch.Tensor
            The pair contains two tensors of shape (N_{in}, D_{in_{src}})` and (N_{out}, D_{in_{dst}}).
        dst_node_transformation_weight: Parameter (input_dst_dim, n_heads * hidden_dim)
        src_node_transformation_weight: Parameter (input_src_dim, n_heads * hidden_dim)
        relation_embedding: torch.Tensor, (relation_input_dim)
        relation_transformation_weight: Parameter (relation_input_dim, n_heads * 2 * hidden_dim)

        Returns
        -------
        torch.Tensor, shape (N, H, D_out)` where H is the number of heads, and D_out is size of output feature.
        """
        graph = graph.local_var()
        # Tensor, (N_src, input_src_dim)
        feat_src = self.dropout(feat[0])
        # Tensor, (N_dst, input_dst_dim)
        feat_dst = self.dropout(feat[1])
        # Tensor, (N_src, n_heads, hidden_dim) -> (N_src, input_src_dim) * (input_src_dim, n_heads * hidden_dim)
        feat_src = torch.matmul(feat_src, src_node_transformation_weight).view(
            -1, self._num_heads, self._out_feats)
        # Tensor, (N_dst, n_heads, hidden_dim) -> (N_dst, input_dst_dim) * (input_dst_dim, n_heads * hidden_dim)
        feat_dst = torch.matmul(feat_dst, dst_node_transformation_weight).view(
            -1, self._num_heads, self._out_feats)
        # Tensor, (n_heads, 2 * hidden_dim) -> (1, input_dst_dim) * (input_dst_dim, n_heads * hidden_dim)
        relation_attention_weight = torch.matmul(
            relation_embedding.unsqueeze(dim=0),
            relation_transformation_weight).view(self._num_heads,
                                                 2 * self._out_feats)

        # first decompose the weight vector into [a_l || a_r], then
        # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j, This implementation is much efficient
        # Tensor, (N_dst, n_heads, 1),   (N_dst, n_heads, hidden_dim) * (n_heads, hidden_dim)
        e_dst = (feat_dst *
                 relation_attention_weight[:, :self._out_feats]).sum(
                     dim=-1, keepdim=True)
        # Tensor, (N_src, n_heads, 1),   (N_src, n_heads, hidden_dim) * (n_heads, hidden_dim)
        e_src = (feat_src *
                 relation_attention_weight[:, self._out_feats:]).sum(
                     dim=-1, keepdim=True)
        # (N_src, n_heads, hidden_dim), (N_src, n_heads, 1)
        graph.srcdata.update({'ft': feat_src, 'e_src': e_src})
        # (N_dst, n_heads, 1)
        graph.dstdata.update({'e_dst': e_dst})
        # compute edge attention, e_src and e_dst are a_src * Wh_src and a_dst * Wh_dst respectively.
        graph.apply_edges(fn.u_add_v('e_src', 'e_dst', 'e'))
        # shape (edges_num, heads, 1)
        e = self.leaky_relu(graph.edata.pop('e'))

        # compute softmax
        graph.edata['a'] = edge_softmax(graph, e)

        graph.update_all(fn.u_mul_e('ft', 'a', 'msg'), fn.sum('msg', 'feat'))
        # (N_dst, n_heads * hidden_dim), reshape (N_dst, n_heads, hidden_dim)
        dst_features = graph.dstdata.pop('feat').reshape(
            -1, self._num_heads * self._out_feats)

        dst_features = self.relu(dst_features)

        return dst_features