Beispiel #1
0
def construct_graph(p_p_g, a_a_g, p_a_g):
    p_p_edges = p_p_g.edge_index
    p_p_edges = utils.sort_edge_index(p_p_edges)[0]
    p_p_edges = utils.to_undirected(p_p_edges)
    p_p_edges = utils.remove_self_loops(p_p_edges)[0]
    a_a_edges = a_a_g.edge_index
    a_a_edges = utils.sort_edge_index(a_a_edges)[0]
    a_a_edges = utils.to_undirected(a_a_edges)
    a_a_edges = utils.remove_self_loops(a_a_edges)[0]
    p_a_edges = p_a_g.edge_index
    p_a_edges = utils.sort_edge_index(p_a_edges)[0]
    p_a_edges = utils.remove_self_loops(p_a_edges)[0]
    paper_paper_graph = dgl.graph((p_p_edges[0], p_p_edges[1]), 'paper', 'pp')
    author_author_graph = dgl.graph((a_a_edges[0], a_a_edges[1]), 'author',
                                    'aa')
    paper_author_graph = dgl.bipartite(
        (p_a_edges[0], p_a_edges[1]),
        'paper',
        'pa',
        'author',
        num_nodes=(paper_paper_graph.number_of_nodes(),
                   author_author_graph.number_of_nodes()))
    author_paper_graph = dgl.bipartite(
        (p_a_edges[1], p_a_edges[0]),
        'author',
        'ap',
        'paper',
        num_nodes=(author_author_graph.number_of_nodes(),
                   paper_paper_graph.number_of_nodes()))
    hg = dgl.hetero_from_relations([
        author_author_graph, author_paper_graph, paper_author_graph,
        paper_paper_graph
    ])

    return hg
Beispiel #2
0
def test_out_subgraph(index_dtype):
    g1 = dgl.graph([(1,0),(2,0),(3,0),(0,1),(2,1),(3,1),(0,2)], 'user', 'follow', index_dtype=index_dtype)
    g2 = dgl.bipartite([(0,0),(0,1),(1,2),(3,2)], 'user', 'play', 'game', index_dtype=index_dtype)
    g3 = dgl.bipartite([(2,0),(2,1),(2,2),(1,0),(1,3),(0,0)], 'game', 'liked-by', 'user', index_dtype=index_dtype)
    g4 = dgl.bipartite([(0,0),(1,0),(2,0),(3,0)], 'user', 'flips', 'coin', index_dtype=index_dtype)
    hg = dgl.hetero_from_relations([g1, g2, g3, g4])
    subg = dgl.out_subgraph(hg, {'user' : [0,1], 'game' : 0})
    assert subg._idtype_str == index_dtype
    assert len(subg.ntypes) == 3
    assert len(subg.etypes) == 4
    u, v = subg['follow'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(1,0),(0,1),(0,2)}
    assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID])
    u, v = subg['play'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(0,0),(0,1),(1,2)}
    assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID])
    u, v = subg['liked-by'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(0,0)}
    assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID])
    u, v = subg['flips'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(0,0),(1,0)}
    assert F.array_equal(hg['flips'].edge_ids(u, v), subg['flips'].edata[dgl.EID])
Beispiel #3
0
def load_dblp(num_walks, metapaths):
    with open('../dataset/DBLP/output/DBLP_Metapath2vec.pickle', 'rb') as f:
        a_list, p_list, c_list, node_list = pickle.load(f)
        pa_list, pc_list = pickle.load(f)

    print(len(pa_list))
    print(len(pc_list))

    author_ids = [node_list.index(i) for i in a_list]

    # 构造异构网络
    pa = dgl.bipartite(pa_list, 'paper', 'pa', 'author')
    ap = dgl.bipartite(transpose(pa_list), 'author', 'ap', 'paper')
    pc = dgl.bipartite(pc_list, 'paper', 'pc', 'conf')
    cp = dgl.bipartite(transpose(pc_list), 'conf', 'cp', 'paper')
    hg = dgl.hetero_from_relations([pa, ap, pc, cp])

    # 随机游走
    sentences = []
    for metapath in metapaths:
        traces, types = dgl.sampling.random_walk(hg,
                                                 author_ids * num_walks,
                                                 metapath=metapath)
        for s in traces.tolist():
            sentences.append([node_list[i] for i in s])

    return hg, sentences, node_list
Beispiel #4
0
def test_pickling_batched_heterograph():
    # copied from test_heterograph.create_test_heterograph()
    plays_spmat = ssp.coo_matrix(([1, 1, 1, 1], ([0, 1, 2, 1], [0, 0, 1, 1])))
    wishes_nx = nx.DiGraph()
    wishes_nx.add_nodes_from(['u0', 'u1', 'u2'], bipartite=0)
    wishes_nx.add_nodes_from(['g0', 'g1'], bipartite=1)
    wishes_nx.add_edge('u0', 'g1', id=0)
    wishes_nx.add_edge('u2', 'g0', id=1)

    follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows')
    plays_g = dgl.bipartite(plays_spmat, 'user', 'plays', 'game')
    wishes_g = dgl.bipartite(wishes_nx, 'user', 'wishes', 'game')
    develops_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops',
                               'game')
    g = dgl.hetero_from_relations([follows_g, plays_g, wishes_g, develops_g])
    g2 = dgl.hetero_from_relations([follows_g, plays_g, wishes_g, develops_g])

    g.nodes['user'].data['u_h'] = F.randn((3, 4))
    g.nodes['game'].data['g_h'] = F.randn((2, 5))
    g.edges['plays'].data['p_h'] = F.randn((4, 6))
    g2.nodes['user'].data['u_h'] = F.randn((3, 4))
    g2.nodes['game'].data['g_h'] = F.randn((2, 5))
    g2.edges['plays'].data['p_h'] = F.randn((4, 6))

    bg = dgl.batch_hetero([g, g2])
    new_bg = _reconstruct_pickle(bg)
    test_utils.check_graph_equal(bg, new_bg)
Beispiel #5
0
def load_dblp(remove_self_loop):
    with open('../dataset/DBLP/output/DBLP_HAN.pickle', 'rb') as f:
        a_list, p_list, c_list, node_list = pickle.load(f)
        pa_list, pc_list = pickle.load(f)
        author_features = pickle.load(f)
        labels = pickle.load(f)

    # 构造异构网络
    pa = dgl.bipartite(pa_list, 'paper', 'pa', 'author')
    ap = dgl.bipartite(transpose(pa_list), 'author', 'ap', 'paper')
    pc = dgl.bipartite(pc_list, 'paper', 'pc', 'conf')
    cp = dgl.bipartite(transpose(pc_list), 'conf', 'cp', 'paper')
    hg = dgl.hetero_from_relations([pa, ap, pc, cp])

    features = torch.FloatTensor(author_features)
    labels = torch.LongTensor(labels)

    num_class = 4

    alls = [i for i in range(len(a_list))]
    train_idx, x, _, _ = train_test_split(alls,
                                          labels,
                                          test_size=0.2,
                                          random_state=52)
    eval_idx, test_idx, _, _ = train_test_split(x,
                                                _,
                                                test_size=0.5,
                                                random_state=40)

    num_nodes = hg.number_of_nodes('author')
    train_mask = get_binary_mask(num_nodes, train_idx)
    eval_mask = get_binary_mask(num_nodes, eval_idx)
    test_mask = get_binary_mask(num_nodes, test_idx)
    return hg, features, labels, num_class, train_mask, test_mask, eval_mask, node_list
Beispiel #6
0
 def obtain_Bs(self, head_b, tail_b):
     n_edges = head_b.shape[0]
     heads, tails = head_b, tail_b
     neg_tails = self.weights.multinomial(self.num_negs * n_edges,
                                          replacement=True)
     neg_heads = torch.LongTensor(heads).view(-1, 1).expand(
         n_edges, self.num_negs).flatten()
     spmat_p = coo_matrix((np.ones(heads.shape[0]), (heads, tails)),
                          shape=(self.g.number_of_nodes('user'),
                                 self.g.number_of_nodes('item')))
     spmat_pr = coo_matrix((np.ones(heads.shape[0]), (tails, heads)),
                           shape=(self.g.number_of_nodes('item'),
                                  self.g.number_of_nodes('user')))
     spmat_neg = coo_matrix(
         (np.ones(heads.shape[0]), (neg_heads, neg_tails)),
         shape=(self.g.number_of_nodes('user'),
                self.g.number_of_nodes('item')))
     pos_graph = dgl.bipartite(spmat_p, 'user', 'edit', 'item')
     pos_graph_r = dgl.bipartite(spmat_pr, 'item', 'edit', 'user')
     neg_graph = dgl.bipartite(spmat_neg, 'user', 'edit', 'item')
     # pos_graph, neg_graph = dgl.compact_graphs([pos_graph, pos_graph_r, neg_graph]) 用了这句会删节点!
     # 可以读取NID
     pos_graph = pos_graph.edge_subgraph({
         ('user', 'edit', 'item'):
         list(range(pos_graph.number_of_edges()))
     })
     pos_graph_r = pos_graph_r.edge_subgraph({
         ('item', 'edit', 'user'):
         list(range(pos_graph_r.number_of_edges()))
     })
     neg_graph = neg_graph.edge_subgraph({
         ('user', 'edit', 'item'):
         list(range(neg_graph.number_of_edges()))
     })
     return pos_graph, pos_graph_r, neg_graph
Beispiel #7
0
def create_test_graph(idtype):
    plays_spmat = ssp.coo_matrix(([1, 1, 1, 1], ([0, 1, 2, 1], [0, 0, 1, 1])))
    wishes_nx = nx.DiGraph()
    wishes_nx.add_nodes_from(['u0', 'u1', 'u2'], bipartite=0)
    wishes_nx.add_nodes_from(['g0', 'g1'], bipartite=1)
    wishes_nx.add_edge('u0', 'g1', id=0)
    wishes_nx.add_edge('u2', 'g0', id=1)

    follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows', idtype=idtype)
    plays_g = dgl.bipartite(plays_spmat,
                            'user',
                            'plays',
                            'game',
                            idtype=idtype)
    wishes_g = dgl.bipartite(wishes_nx,
                             'user',
                             'wishes',
                             'game',
                             idtype=idtype)
    develops_g = dgl.bipartite([(0, 0), (1, 1)],
                               'developer',
                               'develops',
                               'game',
                               idtype=idtype)
    g = dgl.hetero_from_relations([follows_g, plays_g, wishes_g, develops_g])
    return g
Beispiel #8
0
def test_pickling_heterograph():
    # copied from test_heterograph.create_test_heterograph()
    plays_spmat = ssp.coo_matrix(([1, 1, 1, 1], ([0, 1, 2, 1], [0, 0, 1, 1])))
    wishes_nx = nx.DiGraph()
    wishes_nx.add_nodes_from(['u0', 'u1', 'u2'], bipartite=0)
    wishes_nx.add_nodes_from(['g0', 'g1'], bipartite=1)
    wishes_nx.add_edge('u0', 'g1', id=0)
    wishes_nx.add_edge('u2', 'g0', id=1)

    follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows')
    plays_g = dgl.bipartite(plays_spmat, 'user', 'plays', 'game')
    wishes_g = dgl.bipartite(wishes_nx, 'user', 'wishes', 'game')
    develops_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops',
                               'game')
    g = dgl.hetero_from_relations([follows_g, plays_g, wishes_g, develops_g])

    g.nodes['user'].data['u_h'] = F.randn((3, 4))
    g.nodes['game'].data['g_h'] = F.randn((2, 5))
    g.edges['plays'].data['p_h'] = F.randn((4, 6))

    new_g = _reconstruct_pickle(g)
    _assert_is_identical_hetero(g, new_g)

    block = dgl.to_block(g, {'user': [1, 2], 'game': [0, 1], 'developer': []})
    new_block = _reconstruct_pickle(block)
    _assert_is_identical_hetero(block, new_block)
    assert block.is_block
    assert new_block.is_block
Beispiel #9
0
def test_isolated_nodes(index_dtype):
    g = dgl.graph([(0, 1), (1, 2)], num_nodes=5, index_dtype=index_dtype)
    assert g._idtype_str == index_dtype
    assert g.number_of_nodes() == 5

    # Test backward compatibility
    g = dgl.graph([(0, 1), (1, 2)], card=5, index_dtype=index_dtype)
    assert g.number_of_nodes() == 5

    g = dgl.bipartite([(0, 2), (0, 3), (1, 2)],
                      'user',
                      'plays',
                      'game',
                      num_nodes=(5, 7),
                      index_dtype=index_dtype)
    assert g._idtype_str == index_dtype
    assert g.number_of_nodes('user') == 5
    assert g.number_of_nodes('game') == 7

    # Test backward compatibility
    g = dgl.bipartite([(0, 2), (0, 3), (1, 2)],
                      'user',
                      'plays',
                      'game',
                      card=(5, 7),
                      index_dtype=index_dtype)
    assert g._idtype_str == index_dtype
    assert g.number_of_nodes('user') == 5
    assert g.number_of_nodes('game') == 7
Beispiel #10
0
def test_sage_conv(aggre_type):
    ctx = F.ctx()
    g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True)
    sage = nn.SAGEConv(5, 10, aggre_type)
    feat = F.randn((100, 5))
    h = sage(g, feat)
    assert h.shape[-1] == 10

    g = dgl.graph(sp.sparse.random(100, 100, density=0.1))
    sage = nn.SAGEConv(5, 10, aggre_type)
    feat = F.randn((100, 5))
    h = sage(g, feat)
    assert h.shape[-1] == 10

    g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1))
    dst_dim = 5 if aggre_type != 'gcn' else 10
    sage = nn.SAGEConv((10, dst_dim), 2, aggre_type)
    feat = (F.randn((100, 10)), F.randn((200, dst_dim)))
    h = sage(g, feat)
    assert h.shape[-1] == 2
    assert h.shape[0] == 200

    # Test the case for graphs without edges
    g = dgl.bipartite([], num_nodes=(5, 3))
    sage = nn.SAGEConv((3, 3), 2, 'gcn')
    feat = (F.randn((5, 3)), F.randn((3, 3)))
    h = sage(g, feat)
    assert h.shape[-1] == 2
    assert h.shape[0] == 3
    for aggre_type in ['mean', 'pool', 'lstm']:
        sage = nn.SAGEConv((3, 1), 2, aggre_type)
        feat = (F.randn((5, 3)), F.randn((3, 1)))
        h = sage(g, feat)
        assert h.shape[-1] == 2
        assert h.shape[0] == 3
Beispiel #11
0
def construct_graph():
    api_ids = []
    api_names = []
    app_ids = []
    app_names = []
    
    f_3 = open(os.path.join(path, "id_api_320.txt"), encoding='utf-8')
    f_4 = open(os.path.join(path, "id_app_320.txt"), encoding='utf-8')
    while True:
        z = f_3.readline()
        if not z:
            break
        z = z.strip().split()
        identity = int(z[0])
        api_ids.append(identity)
        api_names.append(z[1])
    while True:
        w = f_4.readline()
        if not w:
            break;
        w = w.strip().split()
        identity = int(w[0])
        app_ids.append(identity)
        app_names.append(w[1])

    f_3.close()
    f_4.close()


    api_ids_invmap = {x: i for i, x in enumerate(api_ids)}
    app_ids_invmap = {x: i for i, x in enumerate(app_ids)}

    api_api_src = []
    api_api_dst = []
    api_app_src = []
    api_app_dst = []
    f_1 = open(os.path.join(path, "same_block_api_320.txt"), "r") # B matrix
    f_2 = open(os.path.join(path, "api_app_320.txt"), "r") # A matrix
    
    for x in f_1:
        x = x.split()
        x[0] = int(x[0])
        x[1] = int(x[1].strip('\n'))
        api_api_src.append(api_ids_invmap[x[0]])
        api_api_dst.append(api_ids_invmap[x[1]])
    for y in f_2:
        y = y.split()
        y[0] = int(y[0])
        y[1] = int(y[1].strip('\n'))
        api_app_src.append(api_ids_invmap[y[0]])
        api_app_dst.append(app_ids_invmap[y[1]])
    f_1.close()
    f_2.close()

    app_api = dgl.bipartite((api_app_dst, api_app_src), 'app', 'app_api', 'api1')
    api_app = dgl.bipartite((api_app_src, api_app_dst), 'api2', 'api_app', 'app')
    api_api = dgl.bipartite((api_api_src, api_api_dst), 'api1', 'api_api', 'api2')
    hg = dgl.hetero_from_relations([app_api, api_api, api_app])
    return hg, api_names, app_names
Beispiel #12
0
def load_acm_raw(remove_self_loop):
    assert not remove_self_loop
    url = 'dataset/ACM.mat'
    data_path = get_download_dir() + '/ACM.mat'
    download(_get_dgl_url(url), path=data_path)

    data = sio.loadmat(data_path)
    p_vs_l = data['PvsL']  # paper-field?
    p_vs_a = data['PvsA']  # paper-author
    p_vs_t = data['PvsT']  # paper-term, bag of words
    p_vs_c = data['PvsC']  # paper-conference, labels come from that

    # We assign
    # (1) KDD papers as class 0 (data mining),
    # (2) SIGMOD and VLDB papers as class 1 (database),
    # (3) SIGCOMM and MOBICOMM papers as class 2 (communication)
    conf_ids = [0, 1, 9, 10, 13]
    label_ids = [0, 1, 2, 2, 1]

    p_vs_c_filter = p_vs_c[:, conf_ids]
    p_selected = (p_vs_c_filter.sum(1) != 0).A1.nonzero()[0]
    p_vs_l = p_vs_l[p_selected]
    p_vs_a = p_vs_a[p_selected]
    p_vs_t = p_vs_t[p_selected]
    p_vs_c = p_vs_c[p_selected]

    pa = dgl.bipartite(p_vs_a, 'paper', 'pa', 'author')
    ap = dgl.bipartite(p_vs_a.transpose(), 'author', 'ap', 'paper')
    pl = dgl.bipartite(p_vs_l, 'paper', 'pf', 'field')
    lp = dgl.bipartite(p_vs_l.transpose(), 'field', 'fp', 'paper')
    hg = dgl.hetero_from_relations([pa, ap, pl, lp])

    features = torch.FloatTensor(p_vs_t.toarray())

    pc_p, pc_c = p_vs_c.nonzero()
    labels = np.zeros(len(p_selected), dtype=np.int64)
    for conf_id, label_id in zip(conf_ids, label_ids):
        labels[pc_p[pc_c == conf_id]] = label_id
    labels = torch.LongTensor(labels)

    num_classes = 3

    float_mask = np.zeros(len(pc_p))
    for conf_id in conf_ids:
        pc_c_mask = (pc_c == conf_id)
        float_mask[pc_c_mask] = np.random.permutation(
            np.linspace(0, 1, pc_c_mask.sum()))
    train_idx = np.where(float_mask <= 0.2)[0]
    val_idx = np.where((float_mask > 0.2) & (float_mask <= 0.3))[0]
    test_idx = np.where(float_mask > 0.3)[0]

    num_nodes = hg.number_of_nodes('paper')
    train_mask = get_binary_mask(num_nodes, train_idx)
    val_mask = get_binary_mask(num_nodes, val_idx)
    test_mask = get_binary_mask(num_nodes, test_idx)

    return hg, features, labels, num_classes, train_idx, val_idx, test_idx, \
             train_mask, val_mask, test_mask
Beispiel #13
0
def load_acm_raw():
    from dgl.data.utils import download, get_download_dir, _get_dgl_url
    from scipy import io as sio
    url = 'dataset/ACM.mat'
    data_path = get_download_dir() + '/ACM.mat'
    download(_get_dgl_url(url), path=data_path)

    data = sio.loadmat(data_path)
    p_vs_l = data['PvsL']  # paper-field?
    p_vs_a = data['PvsA']  # paper-author
    p_vs_t = data['PvsT']  # paper-term, bag of words
    p_vs_c = data['PvsC']  # paper-conference, labels come from that

    # We assign
    # (1) KDD papers as class 0 (data mining),
    # (2) SIGMOD and VLDB papers as class 1 (database),
    # (3) SIGCOMM and MOBICOMM papers as class 2 (communication)
    conf_ids = [0, 1, 9, 10, 13]
    label_ids = [0, 1, 2, 2, 1]

    p_vs_c_filter = p_vs_c[:, conf_ids]
    p_selected = (p_vs_c_filter.sum(1) != 0).A1.nonzero()[0]
    p_vs_l = p_vs_l[p_selected]
    p_vs_a = p_vs_a[p_selected]
    p_vs_t = p_vs_t[p_selected]
    p_vs_c = p_vs_c[p_selected]

    pa = dgl.bipartite(p_vs_a, 'paper', 'pa', 'author')
    pl = dgl.bipartite(p_vs_l, 'paper', 'pf', 'field')
    gs = [pa, pl]
    hg = dgl.hetero_from_relations(gs)

    features = torch.FloatTensor(p_vs_t.toarray())

    pc_p, pc_c = p_vs_c.nonzero()
    labels = np.zeros(len(p_selected), dtype=np.int64)
    for conf_id, label_id in zip(conf_ids, label_ids):
        labels[pc_p[pc_c == conf_id]] = label_id
    labels = torch.LongTensor(labels)

    num_classes = 3

    float_mask = np.zeros(len(pc_p))
    for conf_id in conf_ids:
        pc_c_mask = (pc_c == conf_id)
        float_mask[pc_c_mask] = np.random.permutation(
            np.linspace(0, 1, pc_c_mask.sum()))
    train_idx = np.where(float_mask <= 0.2)[0]
    val_idx = np.where((float_mask > 0.2) & (float_mask <= 0.3))[0]
    test_idx = np.where(float_mask > 0.3)[0]

    hg.nodes["paper"].data["feat"] = features

    return hg, labels, num_classes, train_idx, val_idx, test_idx
Beispiel #14
0
    def _generate_enc_graph(self, rating_pairs, rating_values, add_support=False):
        user_movie_R = np.zeros((self._num_user, self._num_movie), dtype=np.float32)
        user_movie_R[rating_pairs] = rating_values
        movie_user_R = user_movie_R.transpose()

        rating_graphs = []
        rating_row, rating_col = rating_pairs
        for rating in self.possible_rating_values:
            ridx = np.where(rating_values == rating)
            rrow = rating_row[ridx]
            rcol = rating_col[ridx]
            bg = dgl.bipartite((rrow, rcol), 'user', str(rating), 'movie',
                               card=(self._num_user, self._num_movie))
            rev_bg = dgl.bipartite((rcol, rrow), 'movie', 'rev-%s' % str(rating), 'user',
                               card=(self._num_movie, self._num_user))
            rating_graphs.append(bg)
            rating_graphs.append(rev_bg)
        graph = dgl.hetero_from_relations(rating_graphs)

        # sanity check
        assert len(rating_pairs[0]) == sum([graph.number_of_edges(et) for et in graph.etypes]) // 2

        if add_support:
            def _calc_norm(x):
                x = x.numpy().astype('float32')
                x[x == 0.] = np.inf
                x = th.FloatTensor(1. / np.sqrt(x))
                return x.to(self._device).unsqueeze(1)
            user_ci = []
            user_cj = []
            movie_ci = []
            movie_cj = []
            for r in self.possible_rating_values:
                r = str(r)
                user_ci.append(graph['rev-%s' % r].in_degrees())
                movie_ci.append(graph[r].in_degrees())
                if self._symm:
                    user_cj.append(graph[r].out_degrees())
                    movie_cj.append(graph['rev-%s' % r].out_degrees())
                else:
                    user_cj.append(th.zeros((self.num_user,)))
                    movie_cj.append(th.zeros((self.num_movie,)))
            user_ci = _calc_norm(sum(user_ci))
            movie_ci = _calc_norm(sum(movie_ci))
            if self._symm:
                user_cj = _calc_norm(sum(user_cj))
                movie_cj = _calc_norm(sum(movie_cj))
            else:
                user_cj = th.ones(self.num_user,).to(self._device)
                movie_cj = th.ones(self.num_movie,).to(self._device)
            graph.nodes['user'].data.update({'ci' : user_ci, 'cj' : user_cj})
            graph.nodes['movie'].data.update({'ci' : movie_ci, 'cj' : movie_cj})

        return graph
def test_batching_with_zero_nodes_edges(index_dtype):
    """Test the features of batched DGLHeteroGraphs"""
    g1 = dgl.heterograph({
        ('user', 'follows', 'user'): [(0, 1), (1, 2)],
        ('user', 'plays', 'game'): []
    }, index_dtype=index_dtype)
    g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
    g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
    g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
    g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])

    g2 = dgl.heterograph({
        ('user', 'follows', 'user'): [(0, 1), (1, 2)],
        ('user', 'plays', 'game'): [(0, 0), (1, 0)]
    }, index_dtype=index_dtype)
    g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
    g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
    g2.nodes['game'].data['h1'] = F.tensor([[0.]])
    g2.nodes['game'].data['h2'] = F.tensor([[1.]])
    g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
    g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
    g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]])

    bg = dgl.batch_hetero([g1, g2])

    assert F.allclose(bg.nodes['user'].data['h1'],
                      F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']], dim=0))
    assert F.allclose(bg.nodes['user'].data['h2'],
                      F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']], dim=0))
    assert F.allclose(bg.nodes['game'].data['h1'], g2.nodes['game'].data['h1'])
    assert F.allclose(bg.nodes['game'].data['h2'], g2.nodes['game'].data['h2'])
    assert F.allclose(bg.edges['follows'].data['h1'],
                      F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']], dim=0))
    assert F.allclose(bg.edges['plays'].data['h1'], g2.edges['plays'].data['h1'])

    # Test unbatching graphs
    g3, g4 = dgl.unbatch_hetero(bg)
    check_equivalence_between_heterographs(
        g1, g3,
        node_attrs={'user': ['h1', 'h2'], 'game': ['h1', 'h2']},
        edge_attrs={('user', 'follows', 'user'): ['h1']})
    check_equivalence_between_heterographs(
        g2, g4,
        node_attrs={'user': ['h1', 'h2'], 'game': ['h1', 'h2']},
        edge_attrs={('user', 'follows', 'user'): ['h1']})

    # Test graphs without edges
    g1 = dgl.bipartite([], 'u', 'r', 'v', num_nodes=(0, 4))
    g2 = dgl.bipartite([], 'u', 'r', 'v', num_nodes=(1, 5))
    g2.nodes['u'].data['x'] = F.tensor([1])
    dgl.batch_hetero([g1, g2])
Beispiel #16
0
def test_sage_conv(aggre_type):
    ctx = F.ctx()
    g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True)
    sage = nn.SAGEConv(5, 10, aggre_type)
    feat = F.randn((100, 5))
    sage = sage.to(ctx)
    h = sage(g, feat)
    assert h.shape[-1] == 10

    g = dgl.graph(sp.sparse.random(100, 100, density=0.1))
    sage = nn.SAGEConv(5, 10, aggre_type)
    feat = F.randn((100, 5))
    sage = sage.to(ctx)
    h = sage(g, feat)
    assert h.shape[-1] == 10

    g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1))
    dst_dim = 5 if aggre_type != 'gcn' else 10
    sage = nn.SAGEConv((10, dst_dim), 2, aggre_type)
    feat = (F.randn((100, 10)), F.randn((200, dst_dim)))
    sage = sage.to(ctx)
    h = sage(g, feat)
    assert h.shape[-1] == 2
    assert h.shape[0] == 200

    g = dgl.graph(sp.sparse.random(100, 100, density=0.001))
    seed_nodes = th.unique(g.edges()[1])
    block = dgl.to_block(g, seed_nodes)
    sage = nn.SAGEConv(5, 10, aggre_type)
    feat = F.randn((block.number_of_src_nodes(), 5))
    sage = sage.to(ctx)
    h = sage(block, feat)
    assert h.shape[0] == block.number_of_dst_nodes()
    assert h.shape[-1] == 10

    # Test the case for graphs without edges
    g = dgl.bipartite([], num_nodes=(5, 3))
    sage = nn.SAGEConv((3, 3), 2, 'gcn')
    feat = (F.randn((5, 3)), F.randn((3, 3)))
    sage = sage.to(ctx)
    h = sage(g, feat)
    assert h.shape[-1] == 2
    assert h.shape[0] == 3
    for aggre_type in ['mean', 'pool', 'lstm']:
        sage = nn.SAGEConv((3, 1), 2, aggre_type)
        feat = (F.randn((5, 3)), F.randn((3, 1)))
        sage = sage.to(ctx)
        h = sage(g, feat)
        assert h.shape[-1] == 2
        assert h.shape[0] == 3
Beispiel #17
0
    def generate_dec_graph(self, rating_users, rating_items):

        u_id = []
        i_id = []
        r = []

        for i in self.rating_values:
            u_id += rating_users[i]
            i_id += rating_items[i]
            r += [i for j in range(len(rating_users[i]))]

        # print(u_id[0], i_id[0])
        r = torch.Tensor(r)
        # r = torch.IntTensor(r)

        ones = np.ones_like(u_id)
        user_item_ratings_coo = sp.coo_matrix((ones, (u_id, i_id)),
                                              shape=(self.n_user, self.n_item),
                                              dtype=np.float32)

        G = dgl.bipartite(user_item_ratings_coo, 'user', 'rate', 'item')
        G.edata['label'] = r

        # print(G.find_edges(0))
        return G
Beispiel #18
0
 def _generate_dec_graph(self, rating_pairs):
     ones = np.ones_like(rating_pairs[0])
     user_movie_ratings_coo = sp.coo_matrix(
         (ones, rating_pairs),
         shape=(self.num_user, self.num_movie),
         dtype=np.float32)
     return dgl.bipartite(user_movie_ratings_coo, 'user', 'rate', 'movie')
Beispiel #19
0
def test_create():
    g0 = create_test_heterograph()
    g1 = create_test_heterograph1()
    g2 = create_test_heterograph2()
    assert set(g0.ntypes) == set(g1.ntypes) == set(g2.ntypes)
    assert set(g0.canonical_etypes) == set(g1.canonical_etypes) == set(
        g2.canonical_etypes)

    # create from nx complete bipartite graph
    nxg = nx.complete_bipartite_graph(3, 4)
    g = dgl.bipartite(nxg, 'user', 'plays', 'game')
    assert g.ntypes == ['user', 'game']
    assert g.etypes == ['plays']
    assert g.number_of_edges() == 12

    # create from scipy
    spmat = ssp.coo_matrix(([1, 1, 1], ([0, 0, 1], [2, 3, 2])), shape=(4, 4))
    g = dgl.graph(spmat)
    assert g.number_of_nodes() == 4
    assert g.number_of_edges() == 3

    # test inferring number of nodes for heterograph
    g = dgl.heterograph({
        ('l0', 'e0', 'l1'): [(0, 1), (0, 2)],
        ('l0', 'e1', 'l2'): [(2, 2)],
        ('l2', 'e2', 'l2'): [(1, 1), (3, 3)],
    })
    assert g.number_of_nodes('l0') == 3
    assert g.number_of_nodes('l1') == 3
    assert g.number_of_nodes('l2') == 4
Beispiel #20
0
def test_gmm_conv():
    ctx = F.ctx()
    g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True)
    gmmconv = nn.GMMConv(5, 10, 3, 4, 'mean')
    feat = F.randn((100, 5))
    pseudo = F.randn((g.number_of_edges(), 3))
    gmmconv = gmmconv.to(ctx)
    h = gmmconv(g, feat, pseudo)
    # currently we only do shape check
    assert h.shape[-1] == 10

    g = dgl.graph(sp.sparse.random(100, 100, density=0.1), readonly=True)
    gmmconv = nn.GMMConv(5, 10, 3, 4, 'mean')
    feat = F.randn((100, 5))
    pseudo = F.randn((g.number_of_edges(), 3))
    gmmconv = gmmconv.to(ctx)
    h = gmmconv(g, feat, pseudo)
    # currently we only do shape check
    assert h.shape[-1] == 10

    g = dgl.bipartite(sp.sparse.random(100, 50, density=0.1), readonly=True)
    gmmconv = nn.GMMConv((5, 2), 10, 3, 4, 'mean')
    feat = F.randn((100, 5))
    feat_dst = F.randn((50, 2))
    pseudo = F.randn((g.number_of_edges(), 3))
    gmmconv = gmmconv.to(ctx)
    h = gmmconv(g, (feat, feat_dst), pseudo)
    # currently we only do shape check
    assert h.shape[-1] == 10
Beispiel #21
0
def test_nn_conv():
    ctx = F.ctx()
    g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True)
    edge_func = th.nn.Linear(4, 5 * 10)
    nnconv = nn.NNConv(5, 10, edge_func, 'mean')
    feat = F.randn((100, 5))
    efeat = F.randn((g.number_of_edges(), 4))
    nnconv = nnconv.to(ctx)
    h = nnconv(g, feat, efeat)
    # currently we only do shape check
    assert h.shape[-1] == 10

    g = dgl.graph(sp.sparse.random(100, 100, density=0.1))
    edge_func = th.nn.Linear(4, 5 * 10)
    nnconv = nn.NNConv(5, 10, edge_func, 'mean')
    feat = F.randn((100, 5))
    efeat = F.randn((g.number_of_edges(), 4))
    nnconv = nnconv.to(ctx)
    h = nnconv(g, feat, efeat)
    # currently we only do shape check
    assert h.shape[-1] == 10

    g = dgl.bipartite(sp.sparse.random(50, 100, density=0.1))
    edge_func = th.nn.Linear(4, 5 * 10)
    nnconv = nn.NNConv((5, 2), 10, edge_func, 'mean')
    feat = F.randn((50, 5))
    feat_dst = F.randn((100, 2))
    efeat = F.randn((g.number_of_edges(), 4))
    nnconv = nnconv.to(ctx)
    h = nnconv(g, (feat, feat_dst), efeat)
    # currently we only do shape check
    assert h.shape[-1] == 10
Beispiel #22
0
def test_gin_conv():
    g = dgl.DGLGraph(nx.erdos_renyi_graph(20, 0.3))
    ctx = F.ctx()

    gin_conv = nn.GINConv(lambda x: x, 'mean', 0.1)
    gin_conv.initialize(ctx=ctx)
    print(gin_conv)

    # test #1: basic
    feat = F.randn((g.number_of_nodes(), 5))
    h = gin_conv(g, feat)
    assert h.shape == (20, 5)

    # test #2: bipartite
    g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1))
    feat = (F.randn((100, 5)), F.randn((200, 5)))
    h = gin_conv(g, feat)
    return h.shape == (20, 5)

    g = dgl.graph(sp.sparse.random(100, 100, density=0.001))
    seed_nodes = np.unique(g.edges()[1].asnumpy())
    block = dgl.to_block(g, seed_nodes)
    feat = F.randn((block.number_of_src_nodes(), 5))
    h = gin_conv(block, feat)
    assert h.shape == (block.number_of_dst_nodes(), 12)
Beispiel #23
0
def test_metapath_random_walk():
    g1 = dgl.bipartite(([0, 1, 2, 3], [0, 1, 2, 3]), 'a', 'ab', 'b')
    g2 = dgl.bipartite(([0, 0, 1, 1, 2, 2, 3, 3], [1, 3, 2, 0, 3, 1, 0, 2]),
                       'b', 'ba', 'a')
    G = dgl.hetero_from_relations([g1, g2])
    seeds = [0, 1]
    traces = dgl.contrib.sampling.metapath_random_walk(G, ['ab', 'ba'] * 4,
                                                       seeds, 3)
    for seed, traces_per_seed in zip(seeds, traces):
        assert len(traces_per_seed) == 3
        for trace in traces_per_seed:
            assert len(trace) == 8
            trace = np.insert(F.asnumpy(trace), 0, seed)
            for i in range(4):
                assert g1.has_edge_between(trace[2 * i], trace[2 * i + 1])
                assert g2.has_edge_between(trace[2 * i + 1], trace[2 * i + 2])
Beispiel #24
0
def test_nn_conv():
    ctx = F.ctx()

    g = dgl.DGLGraph(nx.erdos_renyi_graph(20, 0.3))
    nn_conv = nn.NNConv(5, 2, gluon.nn.Embedding(3, 5 * 2), 'max')
    nn_conv.initialize(ctx=ctx)
    # test #1: basic
    h0 = F.randn((g.number_of_nodes(), 5))
    etypes = nd.random.randint(0, 4, g.number_of_edges()).as_in_context(ctx)
    h1 = nn_conv(g, h0, etypes)
    assert h1.shape == (g.number_of_nodes(), 2)

    g = dgl.graph(nx.erdos_renyi_graph(20, 0.3))
    nn_conv = nn.NNConv(5, 2, gluon.nn.Embedding(3, 5 * 2), 'max')
    nn_conv.initialize(ctx=ctx)
    # test #1: basic
    h0 = F.randn((g.number_of_nodes(), 5))
    etypes = nd.random.randint(0, 4, g.number_of_edges()).as_in_context(ctx)
    h1 = nn_conv(g, h0, etypes)
    assert h1.shape == (g.number_of_nodes(), 2)

    g = dgl.bipartite(sp.sparse.random(20, 10, 0.3))
    nn_conv = nn.NNConv((5, 4), 2, gluon.nn.Embedding(3, 5 * 2), 'max')
    nn_conv.initialize(ctx=ctx)
    # test #1: basic
    h0 = F.randn((g.number_of_src_nodes(), 5))
    hd = F.randn((g.number_of_dst_nodes(), 4))
    etypes = nd.random.randint(0, 4, g.number_of_edges()).as_in_context(ctx)
    h1 = nn_conv(g, (h0, hd), etypes)
    assert h1.shape == (g.number_of_dst_nodes(), 2)
Beispiel #25
0
def test_gat_conv():
    ctx = F.ctx()

    g = dgl.DGLGraph(nx.erdos_renyi_graph(20, 0.3))
    gat = nn.GATConv(10, 20, 5) # n_heads = 5
    gat.initialize(ctx=ctx)
    print(gat)

    # test#1: basic
    feat = F.randn((20, 10))
    h = gat(g, feat)
    assert h.shape == (20, 5, 20)

    # test#2: bipartite
    g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1))
    gat = nn.GATConv((5, 10), 2, 4)
    gat.initialize(ctx=ctx)
    feat = (F.randn((100, 5)), F.randn((200, 10)))
    h = gat(g, feat)
    assert h.shape == (200, 4, 2)

    # test#3: block
    g = dgl.graph(sp.sparse.random(100, 100, density=0.001))
    seed_nodes = np.unique(g.edges()[1].asnumpy())
    block = dgl.to_block(g, seed_nodes)
    gat = nn.GATConv(5, 2, 4)
    gat.initialize(ctx=ctx)
    feat = F.randn((block.number_of_src_nodes(), 5))
    h = gat(block, feat)
    assert h.shape == (block.number_of_dst_nodes(), 4, 2)
Beispiel #26
0
def test_gmm_conv():
    ctx = F.ctx()

    g = dgl.DGLGraph(nx.erdos_renyi_graph(20, 0.3))
    gmm_conv = nn.GMMConv(5, 2, 5, 3, 'max')
    gmm_conv.initialize(ctx=ctx)
    # test #1: basic
    h0 = F.randn((g.number_of_nodes(), 5))
    pseudo = F.randn((g.number_of_edges(), 5))
    h1 = gmm_conv(g, h0, pseudo)
    assert h1.shape == (g.number_of_nodes(), 2)

    g = dgl.graph(nx.erdos_renyi_graph(20, 0.3))
    gmm_conv = nn.GMMConv(5, 2, 5, 3, 'max')
    gmm_conv.initialize(ctx=ctx)
    # test #1: basic
    h0 = F.randn((g.number_of_nodes(), 5))
    pseudo = F.randn((g.number_of_edges(), 5))
    h1 = gmm_conv(g, h0, pseudo)
    assert h1.shape == (g.number_of_nodes(), 2)

    g = dgl.bipartite(sp.sparse.random(20, 10, 0.1))
    gmm_conv = nn.GMMConv((5, 4), 2, 5, 3, 'max')
    gmm_conv.initialize(ctx=ctx)
    # test #1: basic
    h0 = F.randn((g.number_of_src_nodes(), 5))
    hd = F.randn((g.number_of_dst_nodes(), 4))
    pseudo = F.randn((g.number_of_edges(), 5))
    h1 = gmm_conv(g, (h0, hd), pseudo)
    assert h1.shape == (g.number_of_dst_nodes(), 2)
Beispiel #27
0
def test_sage_conv():
    for aggre_type in ['mean', 'pool', 'gcn', 'lstm']:
        ctx = F.ctx()
        g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True)
        sage = nn.SAGEConv(5, 10, aggre_type)
        feat = F.randn((100, 5))
        sage = sage.to(ctx)
        h = sage(g, feat)
        assert h.shape[-1] == 10

        g = dgl.graph(sp.sparse.random(100, 100, density=0.1))
        sage = nn.SAGEConv(5, 10, aggre_type)
        feat = F.randn((100, 5))
        sage = sage.to(ctx)
        h = sage(g, feat)
        assert h.shape[-1] == 10

        g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1))
        dst_dim = 5 if aggre_type != 'gcn' else 10
        sage = nn.SAGEConv((10, dst_dim), 2, aggre_type)
        feat = (F.randn((100, 10)), F.randn((200, dst_dim)))
        sage = sage.to(ctx)
        h = sage(g, feat)
        assert h.shape[-1] == 2
        assert h.shape[0] == 200
Beispiel #28
0
def create_test_heterograph(idtype):
    # test heterograph from the docstring, plus a user -- wishes -- game relation
    # 3 users, 2 games, 2 developers
    # metagraph:
    #    ('user', 'follows', 'user'),
    #    ('user', 'plays', 'game'),
    #    ('user', 'wishes', 'game'),
    #    ('developer', 'develops', 'game')])

    plays_spmat = ssp.coo_matrix(([1, 1, 1, 1], ([0, 1, 2, 1], [0, 0, 1, 1])))
    wishes_nx = nx.DiGraph()
    wishes_nx.add_nodes_from(['u0', 'u1', 'u2'], bipartite=0)
    wishes_nx.add_nodes_from(['g0', 'g1'], bipartite=1)
    wishes_nx.add_edge('u0', 'g1', id=0)
    wishes_nx.add_edge('u2', 'g0', id=1)

    follows_g = dgl.graph([(0, 1), (1, 2)],
                          'user',
                          'follows',
                          idtype=idtype,
                          device=F.ctx())
    plays_g = dgl.bipartite(plays_spmat,
                            'user',
                            'plays',
                            'game',
                            idtype=idtype,
                            device=F.ctx())
    wishes_g = dgl.bipartite(wishes_nx,
                             'user',
                             'wishes',
                             'game',
                             idtype=idtype,
                             device=F.ctx())
    develops_g = dgl.bipartite([(0, 0), (1, 1)],
                               'developer',
                               'develops',
                               'game',
                               idtype=idtype,
                               device=F.ctx())
    assert follows_g.idtype == idtype
    assert plays_g.idtype == idtype
    assert wishes_g.idtype == idtype
    assert develops_g.idtype == idtype
    g = dgl.hetero_from_relations([follows_g, plays_g, wishes_g, develops_g])
    assert g.idtype == idtype
    assert g.device == F.ctx()
    return g
Beispiel #29
0
def test_create():
    g0 = create_test_heterograph()
    g1 = create_test_heterograph1()
    g2 = create_test_heterograph2()
    assert set(g0.ntypes) == set(g1.ntypes) == set(g2.ntypes)
    assert set(g0.canonical_etypes) == set(g1.canonical_etypes) == set(g2.canonical_etypes)

    # create from nx complete bipartite graph
    nxg = nx.complete_bipartite_graph(3, 4)
    g = dgl.bipartite(nxg, 'user', 'plays', 'game')
    assert g.ntypes == ['user', 'game']
    assert g.etypes == ['plays']
    assert g.number_of_edges() == 12

    # create from scipy
    spmat = ssp.coo_matrix(([1,1,1], ([0, 0, 1], [2, 3, 2])), shape=(4, 4))
    g = dgl.graph(spmat)
    assert g.number_of_nodes() == 4
    assert g.number_of_edges() == 3

    # test inferring number of nodes for heterograph
    g = dgl.heterograph({
        ('l0', 'e0', 'l1'): [(0, 1), (0, 2)],
        ('l0', 'e1', 'l2'): [(2, 2)],
        ('l2', 'e2', 'l2'): [(1, 1), (3, 3)],
        })
    assert g.number_of_nodes('l0') == 3
    assert g.number_of_nodes('l1') == 3
    assert g.number_of_nodes('l2') == 4

    # test if validate flag works
    # h**o graph
    fail = False
    try:
        g = dgl.graph(
            ([0, 0, 0, 1, 1, 2], [0, 1, 2, 0, 1, 2]),
            card=2,
            validate=True
        )
    except DGLError:
        fail = True
    finally:
        assert fail, "should catch a DGLError because node ID is out of bound."
    # bipartite graph
    def _test_validate_bipartite(card):
        fail = False
        try:
            g = dgl.bipartite(
                ([0, 0, 1, 1, 2], [1, 1, 2, 2, 3]),
                card=card,
                validate=True
            )
        except DGLError:
            fail = True
        finally:
            assert fail, "should catch a DGLError because node ID is out of bound."

    _test_validate_bipartite((3, 3))
    _test_validate_bipartite((2, 4))
Beispiel #30
0
def test_pickling_heterograph_index_compatibility():
    plays_spmat = ssp.coo_matrix(([1, 1, 1, 1], ([0, 1, 2, 1], [0, 0, 1, 1])))
    wishes_nx = nx.DiGraph()
    wishes_nx.add_nodes_from(['u0', 'u1', 'u2'], bipartite=0)
    wishes_nx.add_nodes_from(['g0', 'g1'], bipartite=1)
    wishes_nx.add_edge('u0', 'g1', id=0)
    wishes_nx.add_edge('u2', 'g0', id=1)

    follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows')
    plays_g = dgl.bipartite(plays_spmat, 'user', 'plays', 'game')
    wishes_g = dgl.bipartite(wishes_nx, 'user', 'wishes', 'game')
    develops_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')
    g = dgl.hetero_from_relations([follows_g, plays_g, wishes_g, develops_g])

    with open("tests/compute/hetero_pickle_old.pkl", "rb") as f:
        gi = pickle.load(f)
        f.close()
    new_g = dgl.DGLHeteroGraph(gi, g.ntypes, g.etypes)
    _assert_is_identical_hetero(g, new_g)