Beispiel #1
0
def calc_weight(g):
    """计算行归一化的D^(-1/2)AD(-1/2)"""
    with g.local_scope():
        g.ndata['in_degree'] = g.in_degrees().float().pow(-0.5)
        g.ndata['out_degree'] = g.out_degrees().float().pow(-0.5)
        g.apply_edges(fn.u_mul_v('out_degree', 'in_degree', 'weight'))
        g.update_all(fn.copy_e('weight', 'msg'), fn.sum('msg', 'norm'))
        g.apply_edges(fn.e_div_v('weight', 'norm', 'weight'))
        return g.edata['weight']
Beispiel #2
0
def normalize_edge_weights(graph, device, num_ew_channels):
    degs = graph.in_degrees().float()
    degs = torch.clamp(degs, min=1)
    norm = torch.pow(degs, 0.5)
    norm = norm.to(args["device"])
    graph.ndata["norm"] = norm.unsqueeze(1)
    graph.apply_edges(fn.e_div_u("feat", "norm", "feat"))
    graph.apply_edges(fn.e_div_v("feat", "norm", "feat"))
    for channel in range(num_ew_channels):
        graph.edata["feat_" +
                    str(channel)] = graph.edata["feat"][:, channel:channel + 1]
Beispiel #3
0
def edge_softmax_fix(graph, score):
    def reduce_sum(nodes):
        accum = torch.sum(nodes.mailbox['temp'], 1)
        return {'out_sum': accum}

    graph = graph.local_var()
    graph.edata['out'] = score
    graph.edata['out'] = torch.exp(graph.edata['out'])
    graph.update_all(fn.copy_e('out', 'temp'), reduce_sum)
    graph.apply_edges(fn.e_div_v('out', 'out_sum', 'out'))
    out = graph.edata['out']
    return out
Beispiel #4
0
def calc_weight(g):
    """
    Compute row_normalized(D^(-1/2)AD^(-1/2))
    """
    with g.local_scope():
        # compute D^(-0.5)*D(-1/2), assuming A is Identity
        g.ndata["in_deg"] = g.in_degrees().float().pow(-0.5)
        g.ndata["out_deg"] = g.out_degrees().float().pow(-0.5)
        g.apply_edges(fn.u_mul_v("out_deg", "in_deg", "weight"))
        # row-normalize weight
        g.update_all(fn.copy_e("weight", "msg"), fn.sum("msg", "norm"))
        g.apply_edges(fn.e_div_v("weight", "norm", "weight"))
        return g.edata["weight"]
Beispiel #5
0
 def expected_output():
     g.srcdata.update({'ft': feat_src, 'el': el})
     g.dstdata.update({'er': er})
     g.apply_edges(fn.u_add_v('el', 'er', 'e'))
     e = leaky_relu(g.edata.pop('e'))
     g.edata['out'] = th.exp(e)
     g.update_all(fn.copy_e('out', 'm'), fn.sum('m', 'out_sum'))
     g.apply_edges(fn.e_div_v('out', 'out_sum', 'out1'))
     # Omit attn_drop for deterministic execution
     g.edata['a'] = g.edata['out1']
     # message passing
     g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft'))
     rst = g.dstdata['ft']
     return rst
Beispiel #6
0
    def forward(self, g, feat_dict):
        funcs = {}
        for srctype, etype, dsttype in g.canonical_etypes:
            g.nodes[dsttype].data['h'] = feat_dict[
                dsttype]  #nodes' original feature
            g.nodes[srctype].data['h'] = feat_dict[srctype]
            g.nodes[srctype].data['t_h'] = self.W_T[etype](
                feat_dict[srctype])  #src nodes' transformed feature

            #compute the attention numerator (exp)
            g.apply_edges(fn.u_mul_v('t_h', 'h', 'x'), etype=etype)
            g.edges[etype].data['x'] = torch.exp(self.W_A[etype](
                g.edges[etype].data['x']))

            #first update to compute the attention denominator (\sum exp)
            funcs[etype] = (fn.copy_e('x', 'm'), fn.sum('m', 'att'))
        g.multi_update_all(funcs, 'sum')

        funcs = {}
        for srctype, etype, dsttype in g.canonical_etypes:
            g.apply_edges(fn.e_div_v('x', 'att', 'att'), etype=etype
                          )  #compute attention weights (numerator/denominator)
            funcs[etype] = (fn.u_mul_e('h', 'att',
                                       'm'), fn.sum('m',
                                                    'h'))  #\sum(h0*att) -> h1
        #second update to obtain h1
        g.multi_update_all(funcs, 'sum')

        #apply activation, layernorm, and dropout
        feat_dict = {}
        for ntype in g.ntypes:
            feat_dict[ntype] = self.dropout(
                self.layernorm(F.relu_(g.nodes[ntype].data['h']))
            )  #apply activation, layernorm, and dropout

        return feat_dict
Beispiel #7
0
def heterograph_edge_softmax(graph, edge_types, edge_data):
    r"""Edge softmax for heterograph.

     For a node :math:`i`, edge softmax is an operation of computing

    .. math::
      a_{ij} = \frac{\exp(z_{ij})}{\sum_{j\in\mathcal{N}(i)}\exp(z_{ij})}

    where :math:`z_{ij}` is a signal of edge :math:`j\rightarrow i`, also called logits
    in the context of softmax. :math:`\mathcal{N}(i)` is the set of nodes that have an
    edge to :math:`i`. The type of j is ignored, i.e. it runs over all j that directs
    to i, no matter what the node type of j is.

    .. code:: python

        score = dgl.EData(g, score)
        score_max = score.dst_max()  # of type dgl.NData
        score = score - score_max  # edge_sub_dst, ret dgl.EData
        score_sum = score.dst_sum()  # of type dgl.NData
        out = score / score_sum    # edge_div_dst, ret dgl.EData
        return out.data
    ""[summary]

    Returns:
        [type]: [description]
    """
    g = graph.local_var()

    #####################################################################################
    ## assign data
    # max_e = []
    # min_e = []
    # for etype, edata in zip(edge_types, edge_data):
    #    g.edges[etype].data["e"] = edata
    #    max_e.append(torch.max(edata))
    #    min_e.append(torch.min(edata))
    # max_e = max(max_e)
    # min_e = min(min_e)

    ## The softmax trick, making the exponential stable.
    ## see https://stackoverflow.com/questions/42599498/numercially-stable-softmax
    ## max_e > 64 to prevent overflow; min_e<-64 to prevent underflow
    ##
    ## Of course, we can apply the trick all the time, but here we choose to apply only
    ## in some conditions to save some time, since multi_update_all is really expensive.
    # if max_e > 64.0 or min_e < -64.0:
    #    # e max (fn.max operates on the axis of features from different nodes)
    #    g.multi_update_all(
    #        {etype: (fn.copy_e("e", "m"), fn.max("m", "emax")) for etype in edge_types},
    #        "max",
    #    )
    #    # subtract max and compute exponential
    #    for etype in edge_types:
    #        g.apply_edges(fn.e_sub_v("e", "emax", "e"), etype=etype)

    #####################################################################################
    for etype, edata in zip(edge_types, edge_data):
        g.edges[etype].data["e"] = edata

    g.multi_update_all(
        {
            etype: (fn.copy_e("e", "m"), fn.max("m", "emax"))
            for etype in edge_types
        }, "max")
    # subtract max and compute exponential
    for etype in edge_types:
        g.apply_edges(fn.e_sub_v("e", "emax", "e"), etype=etype)
        g.edges[etype].data["out"] = torch.exp(g.edges[etype].data["e"])

    #####################################################################################

    # e sum
    g.multi_update_all(
        {
            etype: (fn.copy_e("out", "m"), fn.sum("m", "out_sum"))
            for etype in edge_types
        },
        "sum",
    )

    a = []
    for etype in edge_types:
        g.apply_edges(fn.e_div_v("out", "out_sum", "a"), etype=etype)
        a.append(g.edges[etype].data["a"])

    return a
Beispiel #8
0
    def forward(self, graph, feat):
        graph = graph.local_var()
        h_src = h_dst = self.feat_drop(feat)
        feat = self.fc(h_src).view(-1, self._num_heads, self._out_feats)
        ell = (self.attn_l * feat).sum(dim=-1, keepdim=True)
        err = (self.attn_r * feat).sum(dim=-1, keepdim=True)
        g = graph
        g.srcdata.update({'ft': feat, 'el': ell})
        g.dstdata.update({'er': err})
        g.apply_edges(fn.u_add_v('el', 'er', 'e'))
        e = self.leaky_relu(g.edata.pop('e'))
        g.edata['out'] = th.exp(e)
        g.update_all(fn.copy_e('out', 'm'), fn.sum('m', 'out_sum'))
        g.apply_edges(fn.e_div_v('out', 'out_sum', 'out1'))
        # Omit attn_drop for deterministic execution
        g.edata['a'] = g.edata['out1']
        # message passing
        g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft'))
        dglrst = g.dstdata['ft']
        fusedrst = B.fused_gat(g, feat, ell, err, self.negative_slope)

        dgl_context = utils.to_dgl_context(feat.device)
        graph = graph._graph.get_immutable_gidx(dgl_context)
        with self.cm.zoomIn(namespace=[self, th],
                            graph=graph,
                            node_feats={'f': h_src},
                            edge_feats={}) as v:
            feat_src = [
                self.fc(n.f).view(self._num_heads, self._out_feats)
                for n in v.innbs
            ]
            el = [(nf * self.attn_l).sum(dim=-1, keepdim=True)
                  for nf in feat_src]
            er = (self.fc(v.f).view(self._num_heads, self._out_feats) *
                  self.attn_r).sum(dim=-1, keepdim=True)
            coeff = [th.exp(self.leaky_relu(l + er)) for l in el]
            s = sum(coeff)
            alpha = [c / s for c in coeff]
            rst = sum([ef[0] * ef[1] for ef in zip(alpha, feat_src)])
            self.cm.collect_output(rst)
        rst = self.cm.zoomOut()
        grad_out = th.ones_like(rst)
        egl_graer = grad(outputs=rst,
                         inputs=self.cm._executor.ts.tensor_map['V7'],
                         grad_outputs=grad_out,
                         retain_graph=True)
        egl_grael = grad(outputs=rst,
                         inputs=self.cm._executor.ts.tensor_map['V3'],
                         grad_outputs=grad_out,
                         retain_graph=True)
        dgl_graer = grad(outputs=dglrst,
                         inputs=err,
                         grad_outputs=grad_out,
                         retain_graph=True)
        dgl_grael = grad(outputs=dglrst,
                         inputs=ell,
                         grad_outputs=grad_out,
                         retain_graph=True)
        fused_graer = grad(outputs=fusedrst,
                           inputs=err,
                           grad_outputs=grad_out,
                           retain_graph=True)
        fused_grael = grad(outputs=fusedrst,
                           inputs=ell,
                           grad_outputs=grad_out,
                           retain_graph=True)
        print('rst close?', th.allclose(rst, dglrst), rst)
        #print('exp', g.edata['out'], 'div', g.edata['a'], 'rst', dglrst, 'feat', feat, 'ell', ell, 'err', err)
        print('\negl_graer', egl_graer, '\ndgl_graer', dgl_graer,
              '\nfused_graer', fused_graer, 'egl close with dgl?',
              th.allclose(egl_graer[0], dgl_graer[0]))
        print('\negl_grael', egl_grael, '\ndgl_grael', dgl_grael,
              '\nfused_grael', fused_grael, 'egl close with dgl?',
              th.allclose(egl_grael[0], dgl_grael[0]))
        # residual
        if self.res_fc is not None:
            resval = self.res_fc(h_dst).view(h_dst.shape[0], -1,
                                             self._out_feats)
            rst = rst + resval
        # activation
        if self.activation:
            rst = self.activation(rst)
        return rst, dglrst