Exemple #1
0
def fused_gat(graph, feat_src, el, er, slope):
    g = graph._graph.get_immutable_gidx(utils.to_dgl_context(
        context(feat_src)))
    exp = el.new_empty([g.number_of_edges()] + list(el.size()[1:]))
    s = th.empty_like(el)
    ret = th.empty_like(feat_src)
    return FusedGat.apply(g, feat_src, el, er, s, exp, ret, slope)
Exemple #2
0
    def forward(ctx, g, score, eids):

        # remember to save the graph to backward cache before making it
        # a local variable
        if not is_all(eids):
            g = g.edge_subgraph(eids.long())

        n_nodes = g.number_of_nodes()
        n_edges = g.number_of_edges()
        gidx = g._graph.get_immutable_gidx(utils.to_dgl_context(score.device))
        ctx.backward_cache = n_nodes, n_edges, gidx

        # g.update_all(fn.copy_e('s', 'm'), fn.max('m', 'smax'))
        smax = F.copy_reduce("max", gidx, TargetCode.EDGE, score, n_nodes)
        # g.apply_edges(fn.e_sub_v('s', 'smax', 'out'))
        out = F.binary_reduce("none", "sub", gidx, TargetCode.EDGE,
                              TargetCode.DST, score, smax, n_edges)

        # g.edata['out'] = th.exp(g.edata['out'])
        out = th.exp(out)
        # g.update_all(fn.copy_e('out', 'm'), fn.sum('m', 'out_sum'))
        out_sum = F.copy_reduce("sum", gidx, TargetCode.EDGE, out, n_nodes)
        # g.apply_edges(fn.e_div_v('out', 'out_sum', 'out'))
        out = F.binary_reduce("none", "div", gidx, TargetCode.EDGE,
                              TargetCode.DST, out, out_sum, n_edges)

        ctx.save_for_backward(out)
        return out
Exemple #3
0
    def forward(ctx, g, score, eids):
        """Forward function.
        Pseudo-code:
        .. code:: python
            score = dgl.EData(g, score)
            score_max = score.dst_max()  # of type dgl.NData
            score = score - score_max  # edge_sub_dst, ret dgl.EData
            score_sum = score.dst_sum()  # of type dgl.NData
            out = score / score_sum    # edge_div_dst, ret dgl.EData
            return out.data
        """
        # remember to save the graph to backward cache before making it
        # a local variable
        if not is_all(eids):
            g = g.edge_subgraph(eids.long())

        n_nodes = g.number_of_dst_nodes()
        n_edges = g.number_of_edges()

        # TODO(BarclayII): this is a temporary fix of memory leakage in PyTorch
        # in PR #1139.  We should investigate further on what was actually happening
        # when implementing EdgeSoftmax with message passing API instead of
        # operators.
        score_context = utils.to_dgl_context(score.device)
        if isinstance(g, DGLGraph):
            gidx = g._graph.get_immutable_gidx(score_context)
        elif isinstance(g, DGLHeteroGraph):
            assert g._graph.number_of_etypes() == 1, \
                "EdgeSoftmax only support one edge type"
            gidx = g._graph.get_unitgraph(0, score_context)

        ctx.backward_cache = n_nodes, n_edges, gidx

        #g.update_all(fn.copy_e('s', 'm'), fn.max('m', 'smax'))
        smax = F.copy_reduce('max', gidx, TargetCode.EDGE, score, n_nodes)
        #g.apply_edges(fn.e_sub_v('s', 'smax', 'out'))
        out = F.binary_reduce('none', 'sub', gidx, TargetCode.EDGE,
                              TargetCode.DST, score, smax, n_edges)
        #g.edata['out'] = th.exp(g.edata['out'])
        out = th.exp(out)
        #g.update_all(fn.copy_e('out', 'm'), fn.sum('m', 'out_sum'))
        out_sum = F.copy_reduce('sum', gidx, TargetCode.EDGE, out, n_nodes)
        #g.apply_edges(fn.e_div_v('out', 'out_sum', 'out'))
        out = F.binary_reduce('none', 'div', gidx, TargetCode.EDGE,
                              TargetCode.DST, out, out_sum, n_edges)

        ctx.save_for_backward(out)
        return out
Exemple #4
0
    def gat_layer_dgl(feat, weight, attn_l, attn_r, in_feat_len, out_feat_len):
        feat2 = torch.mm(feat, weight)
        att_l = torch.mm(feat2, attn_l)
        att_r = torch.mm(feat2, attn_r)
        g.srcdata.update({'ft': feat2, 'el': att_l})
        g.dstdata.update({'er': att_r})
        g.apply_edges(fn.u_add_v('el', 'er', 'e'))
        e = torch.exp(F.leaky_relu(g.edata.pop('e'), 0.1))

        cont = utils.to_dgl_context(e.device)
        gidx = g._graph.get_immutable_gidx(cont)
        e_sum = backend.copy_reduce("sum", gidx, TargetCode.EDGE, e, num_v)
        att = backend.binary_reduce('none', 'div', gidx, TargetCode.EDGE,
                                    TargetCode.DST, e, e_sum, n_edges)
        g.edata['a'] = att
        g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft'))
        output = g.dstdata['ft']
        torch.cuda.synchronize()
        return output
Exemple #5
0
def nb_access_bench(graph, feat, node_map, deg_inc_node_map):
    g = graph._graph.get_immutable_gidx(utils.to_dgl_context(context(feat)))
    return NbAccess.apply(g, feat, node_map, deg_inc_node_map)
Exemple #6
0
def train(args):
    set_random_seed(args.seed)
    device = get_device(args.device)
    g, author_rank, field_ids, true_relevance = load_rank_data(device)
    out_dim = g.nodes['field'].data['feat'].shape[1]
    add_node_feat(g, 'pretrained', args.node_embed_path, use_raw_id=True)
    field_paper = recall_paper(g.cpu(), field_ids,
                               args.num_recall)  # {field_id: [paper_id]}

    sampler = MultiLayerNeighborSampler([args.neighbor_size] * args.num_layers)
    sampler.set_output_context(to_dgl_context(device))
    triplet_collator = TripletNodeCollator(g, sampler)

    model = RHGNN(
        {ntype: g.nodes[ntype].data['feat'].shape[1]
         for ntype in g.ntypes}, args.num_hidden, out_dim, args.num_rel_hidden,
        args.num_rel_hidden, args.num_heads, g.ntypes, g.canonical_etypes,
        'author', args.num_layers, args.dropout).to(device)
    if args.load_path:
        model.load_state_dict(torch.load(args.load_path, map_location=device))
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                     T_max=len(field_ids) *
                                                     args.epochs,
                                                     eta_min=args.lr / 100)
    warnings.filterwarnings(
        'ignore', 'Setting attributes on ParameterDict is not supported')
    for epoch in range(args.epochs):
        model.train()
        losses = []
        for f in tqdm(field_ids):
            false_author_ids = list(
                set(g.in_edges(field_paper[f], etype='writes')[0].tolist()) -
                set(author_rank[f]))
            triplets = sample_triplets(f, author_rank[f], false_author_ids,
                                       args.num_triplets).to(device)
            aid, blocks = triplet_collator.collate(triplets)
            author_embeds = model(blocks, blocks[0].srcdata['feat'])
            author_embeds = author_embeds / author_embeds.norm(dim=1,
                                                               keepdim=True)
            aid_map = {a: i for i, a in enumerate(aid.tolist())}
            anchor = g.nodes['field'].data['feat'][triplets[:, 0]]
            positive = author_embeds[[
                aid_map[a] for a in triplets[:, 1].tolist()
            ]]
            negative = author_embeds[[
                aid_map[a] for a in triplets[:, 2].tolist()
            ]]
            loss = F.triplet_margin_loss(anchor, positive, negative,
                                         args.margin)

            losses.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()
            torch.cuda.empty_cache()
        print('Epoch {:d} | Loss {:.4f}'.format(epoch,
                                                sum(losses) / len(losses)))
        torch.save(model.state_dict(), args.model_save_path)
        if epoch % args.eval_every == 0 or epoch == args.epochs - 1:
            print(
                METRICS_STR.format(*evaluate(
                    model, g, out_dim, sampler, args.batch_size, device,
                    field_ids, field_paper, author_rank, true_relevance)))
    torch.save(model.state_dict(), args.model_save_path)
    print('模型已保存到', args.model_save_path)

    embeds = infer(model, g, 'author', out_dim, sampler, args.batch_size,
                   device)
    author_embed_save_path = DATA_DIR / 'rank/author_embed.pkl'
    torch.save(embeds.cpu(), author_embed_save_path)
    print('学者嵌入已保存到', author_embed_save_path)
Exemple #7
0
    def forward(self, graph, feat):
        graph = graph.local_var()
        h_src = h_dst = self.feat_drop(feat)
        feat = self.fc(h_src).view(-1, self._num_heads, self._out_feats)
        ell = (self.attn_l * feat).sum(dim=-1, keepdim=True)
        err = (self.attn_r * feat).sum(dim=-1, keepdim=True)
        g = graph
        g.srcdata.update({'ft': feat, 'el': ell})
        g.dstdata.update({'er': err})
        g.apply_edges(fn.u_add_v('el', 'er', 'e'))
        e = self.leaky_relu(g.edata.pop('e'))
        g.edata['out'] = th.exp(e)
        g.update_all(fn.copy_e('out', 'm'), fn.sum('m', 'out_sum'))
        g.apply_edges(fn.e_div_v('out', 'out_sum', 'out1'))
        # Omit attn_drop for deterministic execution
        g.edata['a'] = g.edata['out1']
        # message passing
        g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft'))
        dglrst = g.dstdata['ft']
        fusedrst = B.fused_gat(g, feat, ell, err, self.negative_slope)

        dgl_context = utils.to_dgl_context(feat.device)
        graph = graph._graph.get_immutable_gidx(dgl_context)
        with self.cm.zoomIn(namespace=[self, th],
                            graph=graph,
                            node_feats={'f': h_src},
                            edge_feats={}) as v:
            feat_src = [
                self.fc(n.f).view(self._num_heads, self._out_feats)
                for n in v.innbs
            ]
            el = [(nf * self.attn_l).sum(dim=-1, keepdim=True)
                  for nf in feat_src]
            er = (self.fc(v.f).view(self._num_heads, self._out_feats) *
                  self.attn_r).sum(dim=-1, keepdim=True)
            coeff = [th.exp(self.leaky_relu(l + er)) for l in el]
            s = sum(coeff)
            alpha = [c / s for c in coeff]
            rst = sum([ef[0] * ef[1] for ef in zip(alpha, feat_src)])
            self.cm.collect_output(rst)
        rst = self.cm.zoomOut()
        grad_out = th.ones_like(rst)
        egl_graer = grad(outputs=rst,
                         inputs=self.cm._executor.ts.tensor_map['V7'],
                         grad_outputs=grad_out,
                         retain_graph=True)
        egl_grael = grad(outputs=rst,
                         inputs=self.cm._executor.ts.tensor_map['V3'],
                         grad_outputs=grad_out,
                         retain_graph=True)
        dgl_graer = grad(outputs=dglrst,
                         inputs=err,
                         grad_outputs=grad_out,
                         retain_graph=True)
        dgl_grael = grad(outputs=dglrst,
                         inputs=ell,
                         grad_outputs=grad_out,
                         retain_graph=True)
        fused_graer = grad(outputs=fusedrst,
                           inputs=err,
                           grad_outputs=grad_out,
                           retain_graph=True)
        fused_grael = grad(outputs=fusedrst,
                           inputs=ell,
                           grad_outputs=grad_out,
                           retain_graph=True)
        print('rst close?', th.allclose(rst, dglrst), rst)
        #print('exp', g.edata['out'], 'div', g.edata['a'], 'rst', dglrst, 'feat', feat, 'ell', ell, 'err', err)
        print('\negl_graer', egl_graer, '\ndgl_graer', dgl_graer,
              '\nfused_graer', fused_graer, 'egl close with dgl?',
              th.allclose(egl_graer[0], dgl_graer[0]))
        print('\negl_grael', egl_grael, '\ndgl_grael', dgl_grael,
              '\nfused_grael', fused_grael, 'egl close with dgl?',
              th.allclose(egl_grael[0], dgl_grael[0]))
        # residual
        if self.res_fc is not None:
            resval = self.res_fc(h_dst).view(h_dst.shape[0], -1,
                                             self._out_feats)
            rst = rst + resval
        # activation
        if self.activation:
            rst = self.activation(rst)
        return rst, dglrst