Exemple #1
0
def test_out_subgraph(index_dtype):
    g1 = dgl.graph([(1,0),(2,0),(3,0),(0,1),(2,1),(3,1),(0,2)], 'user', 'follow', index_dtype=index_dtype)
    g2 = dgl.bipartite([(0,0),(0,1),(1,2),(3,2)], 'user', 'play', 'game', index_dtype=index_dtype)
    g3 = dgl.bipartite([(2,0),(2,1),(2,2),(1,0),(1,3),(0,0)], 'game', 'liked-by', 'user', index_dtype=index_dtype)
    g4 = dgl.bipartite([(0,0),(1,0),(2,0),(3,0)], 'user', 'flips', 'coin', index_dtype=index_dtype)
    hg = dgl.hetero_from_relations([g1, g2, g3, g4])
    subg = dgl.out_subgraph(hg, {'user' : [0,1], 'game' : 0})
    assert subg._idtype_str == index_dtype
    assert len(subg.ntypes) == 3
    assert len(subg.etypes) == 4
    u, v = subg['follow'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(1,0),(0,1),(0,2)}
    assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID])
    u, v = subg['play'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(0,0),(0,1),(1,2)}
    assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID])
    u, v = subg['liked-by'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(0,0)}
    assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID])
    u, v = subg['flips'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(0,0),(1,0)}
    assert F.array_equal(hg['flips'].edge_ids(u, v), subg['flips'].edata[dgl.EID])
Exemple #2
0
def test_out_subgraph(idtype):
    hg = dgl.heterograph({
        ('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
        ('user', 'play', 'game'): ([0, 0, 1, 3], [0, 1, 2, 2]),
        ('game', 'liked-by', 'user'): ([2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0]),
        ('user', 'flips', 'coin'): ([0, 1, 2, 3], [0, 0, 0, 0])
    }, idtype=idtype)
    subg = dgl.out_subgraph(hg, {'user' : [0,1], 'game' : 0})
    assert subg.idtype == idtype
    assert len(subg.ntypes) == 3
    assert len(subg.etypes) == 4
    u, v = subg['follow'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(1,0),(0,1),(0,2)}
    assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID])
    u, v = subg['play'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(0,0),(0,1),(1,2)}
    assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID])
    u, v = subg['liked-by'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(0,0)}
    assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID])
    u, v = subg['flips'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(0,0),(1,0)}
    assert F.array_equal(hg['flips'].edge_ids(u, v), subg['flips'].edata[dgl.EID])
def evaluate(g, field_ids, author_rank, true_relevance, field_paper):
    predict_rank = {}
    field_feat = g.nodes['field'].data['feat']
    apg = g['paper', 'writes_rev', 'author']
    for i, f in enumerate(field_ids):
        pid = field_paper[f]
        paper_score = torch.matmul(g.nodes['paper'].data['feat'][pid],
                                   field_feat[f])
        sg = dgl.out_subgraph(apg, {'paper': pid}, relabel_nodes=True)
        sg.nodes['paper'].data['score'] = paper_score
        sg.update_all(fn.copy_u('score', 's'), fn.sum('s', 's'))
        predict_rank[f] = (sg.nodes['author'].data[dgl.NID],
                           sg.nodes['author'].data['s'])
    return calc_metrics(field_ids, author_rank, true_relevance, predict_rank)
Exemple #4
0
def recall_paper(g, field_ids, num_recall):
    """预先计算论文召回

    :param g: DGLGraph 异构图
    :param field_ids: List[int] 目标领域id
    :param num_recall: 每个领域召回的论文数
    :return: Dict[int, List[int]] {field_id: [paper_id]}
    """
    similarity = torch.zeros(len(field_ids), g.num_nodes('paper'))
    sg = dgl.out_subgraph(g['has_field_rev'], {'field': field_ids},
                          relabel_nodes=True)
    sg.apply_edges(fn.u_dot_v('feat', 'feat', 's'))
    f, p = sg.edges()
    similarity[f, sg.nodes['paper'].data[dgl.NID][p]] = sg.edata['s'].squeeze(
        dim=1)
    _, pid = similarity.topk(num_recall, dim=1)
    return {f: pid[i].tolist() for i, f in enumerate(field_ids)}
Exemple #5
0
def test_out_subgraph(idtype):
    hg = dgl.heterograph(
        {
            ('user', 'follow', 'user'):
            ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
            ('user', 'play', 'game'): ([0, 0, 1, 3], [0, 1, 2, 2]),
            ('game', 'liked-by', 'user'):
            ([2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0]),
            ('user', 'flips', 'coin'): ([0, 1, 2, 3], [0, 0, 0, 0])
        },
        idtype=idtype)
    subg = dgl.out_subgraph(hg, {'user': [0, 1], 'game': 0})
    assert subg.idtype == idtype
    assert len(subg.ntypes) == 3
    assert len(subg.etypes) == 4
    u, v = subg['follow'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(1, 0), (0, 1), (0, 2)}
    assert F.array_equal(hg['follow'].edge_ids(u, v),
                         subg['follow'].edata[dgl.EID])
    u, v = subg['play'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(0, 0), (0, 1), (1, 2)}
    assert F.array_equal(hg['play'].edge_ids(u, v),
                         subg['play'].edata[dgl.EID])
    u, v = subg['liked-by'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(0, 0)}
    assert F.array_equal(hg['liked-by'].edge_ids(u, v),
                         subg['liked-by'].edata[dgl.EID])
    u, v = subg['flips'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(0, 0), (1, 0)}
    assert F.array_equal(hg['flips'].edge_ids(u, v),
                         subg['flips'].edata[dgl.EID])
    for ntype in subg.ntypes:
        assert dgl.NID not in subg.nodes[ntype].data

    # Test store_ids
    subg = dgl.out_subgraph(hg, {'user': [0, 1], 'game': 0}, store_ids=False)
    for etype in subg.canonical_etypes:
        assert dgl.EID not in subg.edges[etype].data
    for ntype in subg.ntypes:
        assert dgl.NID not in subg.nodes[ntype].data

    # Test relabel nodes
    subg = dgl.out_subgraph(hg, {'user': [1], 'game': 0}, relabel_nodes=True)
    assert subg.idtype == idtype
    assert len(subg.ntypes) == 3
    assert len(subg.etypes) == 4

    u, v = subg['follow'].edges()
    old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u)
    old_v = F.gather_row(subg.nodes['user'].data[dgl.NID], v)
    edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
    assert edge_set == {(1, 0)}
    assert F.array_equal(hg['follow'].edge_ids(old_u, old_v),
                         subg['follow'].edata[dgl.EID])

    u, v = subg['play'].edges()
    old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u)
    old_v = F.gather_row(subg.nodes['game'].data[dgl.NID], v)
    edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
    assert edge_set == {(1, 2)}
    assert F.array_equal(hg['play'].edge_ids(old_u, old_v),
                         subg['play'].edata[dgl.EID])

    u, v = subg['liked-by'].edges()
    old_u = F.gather_row(subg.nodes['game'].data[dgl.NID], u)
    old_v = F.gather_row(subg.nodes['user'].data[dgl.NID], v)
    edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
    assert edge_set == {(0, 0)}
    assert F.array_equal(hg['liked-by'].edge_ids(old_u, old_v),
                         subg['liked-by'].edata[dgl.EID])

    u, v = subg['flips'].edges()
    old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u)
    old_v = F.gather_row(subg.nodes['coin'].data[dgl.NID], v)
    edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
    assert edge_set == {(1, 0)}
    assert F.array_equal(hg['flips'].edge_ids(old_u, old_v),
                         subg['flips'].edata[dgl.EID])
    assert subg.num_nodes('user') == 2
    assert subg.num_nodes('game') == 2
    assert subg.num_nodes('coin') == 1
Exemple #6
0
    num_edges = data.num_edges()

    num_edges = data.num_edges()
    trainval_div = int(VALID_SPLIT * num_edges)

    # Select new node from test set and remove them from entire graph
    test_split_ts = data.edata['timestamp'][trainval_div]
    test_nodes = torch.cat(
        [data.edges()[0][trainval_div:],
         data.edges()[1][trainval_div:]]).unique().numpy()
    test_new_nodes = np.random.choice(test_nodes,
                                      int(0.1 * len(test_nodes)),
                                      replace=False)

    in_subg = dgl.in_subgraph(data, test_new_nodes)
    out_subg = dgl.out_subgraph(data, test_new_nodes)
    # Remove edge who happen before the test set to prevent from learning the connection info
    new_node_in_eid_delete = in_subg.edata[dgl.EID][
        in_subg.edata['timestamp'] < test_split_ts]
    new_node_out_eid_delete = out_subg.edata[dgl.EID][
        out_subg.edata['timestamp'] < test_split_ts]
    new_node_eid_delete = torch.cat(
        [new_node_in_eid_delete, new_node_out_eid_delete]).unique()

    graph_new_node = copy.deepcopy(data)
    # relative order preseved
    graph_new_node.remove_edges(new_node_eid_delete)

    # Now for no new node graph, all edge id need to be removed
    in_eid_delete = in_subg.edata[dgl.EID]
    out_eid_delete = out_subg.edata[dgl.EID]