Beispiel #1
0
def generate_rand_graph(n, connect_more=False, complete=False, add_self_loop=False):
    if complete:
        cord = [(i, j)
                for i, j in itertools.product(range(n), range(n)) if i != j]
        row = [t[0] for t in cord]
        col = [t[1] for t in cord]
        data = np.ones((len(row),))
        arr = sp.sparse.coo_matrix((data, (row, col)), shape=(n, n))
    else:
        arr = (sp.sparse.random(n, n, density=0.1,
                                format='coo') != 0).astype(np.int64)
        # having one node to connect to all other nodes.
        if connect_more:
            arr[0] = 1
            arr[:, 0] = 1
    if add_self_loop:
        g = dgl.DGLGraphStale(arr, readonly=False)
        nodes = np.arange(g.number_of_nodes())
        g.add_edges(nodes, nodes)
        g.readonly()
    else:
        g = dgl.DGLGraphStale(arr, readonly=True)
    g.ndata['h1'] = F.randn((g.number_of_nodes(), 10))
    g.edata['h2'] = F.randn((g.number_of_edges(), 3))
    return g
Beispiel #2
0
def test_random_walk_with_restart():
    edge_list = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 3), (3, 2), (2, 1),
                 (1, 0)]
    seeds = [0, 1]
    max_nodes = 10

    g = dgl.DGLGraphStale(edge_list)

    # test normal RWR
    traces = dgl.contrib.sampling.random_walk_with_restart(
        g, seeds, 0.2, max_nodes)
    assert len(traces) == len(seeds)
    for traces_per_seed in traces:
        total_nodes = 0
        for t in traces_per_seed:
            total_nodes += len(t)
            trace_diff = np.diff(F.zerocopy_to_numpy(t), axis=-1)
            assert (np.abs(trace_diff) == 1).all()
        assert total_nodes >= max_nodes

    # test RWR with early stopping
    traces = dgl.contrib.sampling.random_walk_with_restart(
        g, seeds, 1, 100, max_nodes, 1)
    assert len(traces) == len(seeds)
    for traces_per_seed in traces:
        assert sum(len(t) for t in traces_per_seed) < 100

    # test bipartite RWR
    traces = dgl.contrib.sampling.bipartite_single_sided_random_walk_with_restart(
        g, seeds, 0.2, max_nodes)
    assert len(traces) == len(seeds)
    for traces_per_seed in traces:
        for t in traces_per_seed:
            trace_diff = np.diff(F.zerocopy_to_numpy(t), axis=-1)
            assert (trace_diff % 2 == 0).all()
Beispiel #3
0
def test_nonuniform_neighbor_sampler():
    # Construct a graph with
    # (1) A path (0, 1, ..., 99) with weight 1
    # (2) A bunch of random edges with weight 0.
    edges = []
    for i in range(99):
        edges.append((i, i + 1))
    for i in range(1000):
        edge = (np.random.randint(100), np.random.randint(100))
        if edge not in edges:
            edges.append(edge)
    src, dst = zip(*edges)
    g = dgl.DGLGraphStale()
    g.add_nodes(100)
    g.add_edges(src, dst)
    g.readonly()

    g.edata['w'] = F.cat([
        F.ones((99, ), F.float64, F.cpu()),
        F.zeros((len(edges) - 99, ), F.float64, F.cpu())
    ], 0)

    # Test 1-neighbor NodeFlow with 99 as target node.
    # The generated NodeFlow should only contain node i on layer i.
    sampler = dgl.contrib.sampling.NeighborSampler(g,
                                                   1,
                                                   1,
                                                   99,
                                                   'in',
                                                   transition_prob='w',
                                                   seed_nodes=[99])
    nf = next(iter(sampler))

    assert nf.num_layers == 100
    for i in range(nf.num_layers):
        assert nf.layer_size(i) == 1
        assert F.asnumpy(nf.layer_parent_nid(i)[0]) == i

    # Test the reverse direction
    sampler = dgl.contrib.sampling.NeighborSampler(g,
                                                   1,
                                                   1,
                                                   99,
                                                   'out',
                                                   transition_prob='w',
                                                   seed_nodes=[0])
    nf = next(iter(sampler))

    assert nf.num_layers == 100
    for i in range(nf.num_layers):
        assert nf.layer_size(i) == 1
        assert F.asnumpy(nf.layer_parent_nid(i)[0]) == 99 - i
Beispiel #4
0
def test_random_walk():
    edge_list = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 3), (3, 2), (2, 1),
                 (1, 0)]
    seeds = [0, 1]
    n_traces = 3
    n_hops = 4

    g = dgl.DGLGraphStale(edge_list, readonly=True)
    traces = dgl.contrib.sampling.random_walk(g, seeds, n_traces, n_hops)
    traces = F.zerocopy_to_numpy(traces)

    assert traces.shape == (len(seeds), n_traces, n_hops + 1)

    for i, seed in enumerate(seeds):
        assert (traces[i, :, 0] == seeds[i]).all()

    trace_diff = np.diff(traces, axis=-1)
    # only nodes with adjacent IDs are connected
    assert (np.abs(trace_diff) == 1).all()
##cora dataset have 2708 nodes, 1208 of them is used as train set 1000 of them is used as test set
data = citegrh.load_cora()
adj = nx.adjacency_matrix(data.graph)
# reorder
n_nodes = 2708
ids_shuffle = np.arange(n_nodes)
np.random.shuffle(ids_shuffle)
adj = adj[ids_shuffle, :][:, ids_shuffle]
data.features = data.features[ids_shuffle]
data.labels = data.labels[ids_shuffle]
##train-test split
train_nodes = np.arange(1208)
test_nodes = np.arange(1708, 2708)
train_adj = adj[train_nodes, :][:, train_nodes]
test_adj = adj[test_nodes, :][:, test_nodes]
trainG = dgl.DGLGraphStale(train_adj)
allG = dgl.DGLGraphStale(adj)
h = torch.tensor(data.features[train_nodes], dtype=torch.float32)
test_h = torch.tensor(data.features[test_nodes], dtype=torch.float32)
all_h = torch.tensor(data.features, dtype=torch.float32)
train_nodes = torch.tensor(train_nodes)
test_nodes = torch.tensor(test_nodes)
y_train = torch.tensor(data.labels[train_nodes])
y_test = torch.tensor(data.labels[test_nodes])
input_size = h.shape[1]
output_size = data.num_labels

##configuration
config = {
    'n_epoch': 300,
    'lamb': 0.5,
Beispiel #6
0
def generate_rand_graph(n):
    arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(
        np.int64)
    return dgl.DGLGraphStale(arr, readonly=True)