def test_identity_unweighted_weighted_1_walks(self): # graph with all edge weights = 1 g = nx.Graph() edges = [(1, 2, 1), (2, 3, 1), (3, 4, 1), (4, 1, 1)] g.add_weighted_edges_from(edges) g = StellarGraph(g) nodes = g.nodes() n = 4 length = 4 seed = 42 p = 1.0 q = 1.0 biasedrw = BiasedRandomWalk(g) assert biasedrw.run(nodes=nodes, n=n, p=p, q=q, length=length, seed=seed, weighted=True) == biasedrw.run(nodes=nodes, n=n, p=p, q=q, length=length, seed=seed, weighted=False)
def test_UnsupervisedSampler_parameter(self): g = create_test_graph() # rw = UniformRandomWalk(StellarGraph(g)) # if no graph is provided with pytest.raises(ValueError): UnsupervisedSampler(G=None) # graph has to be a Stellargraph object with pytest.raises(ValueError): UnsupervisedSampler(G=g) g = StellarGraph(g) """ # only Uniform random walk is supported at the moment with pytest.raises(TypeError): UnsupervisedSampler(G=g, walker="any random walker") # if no walker is provided, default to Uniform Random Walk sampler = UnsupervisedSampler(G=g) assert isinstance(sampler.walker, UniformRandomWalk) """ # walk must have length strictly greater than 1 with pytest.raises(ValueError): UnsupervisedSampler(G=g, length=1) # at least 1 walk from each root node with pytest.raises(ValueError): UnsupervisedSampler(G=g, number_of_walks=0) # nodes nodes parameter should be an iterable of node IDs with pytest.raises(ValueError): UnsupervisedSampler(G=g, nodes=1) # if no root nodes are provided for sampling defaulting to using all nodes as root nodes sampler = UnsupervisedSampler(G=g, nodes=None) assert sampler.nodes == list(g.nodes()) # if the seed value is provided check # that the random choices is reproducable sampler = UnsupervisedSampler(G=g, seed=1) assert sampler.random.choices(range(100), k=10) == [ 13, 84, 76, 25, 49, 44, 65, 78, 9, 2, ]
def example_graph_1(feature_size=None): G = nx.Graph() elist = [(1, 2), (2, 3), (1, 4), (3, 2)] G.add_nodes_from([1, 2, 3, 4], label="default") G.add_edges_from(elist, label="default") # Add example features if feature_size is not None: for v in G.nodes(): G.node[v]["feature"] = np.ones(feature_size) return StellarGraph(G, node_features="feature") else: return StellarGraph(G)
def weighted(a, b, c, d): nodes = pd.DataFrame(index=[1, 2, 3, 4]) edges = pd.DataFrame( [(1, 2, a), (2, 3, b), (3, 4, c), (4, 1, d)], columns=["source", "target", "weight"], ) return StellarGraph(nodes, edges)
def temporal_graph(): nodes = [1, 2, 3, 4, 5, 6] edges = [(1, 2, 5), (2, 3, 2), (2, 4, 10), (4, 5, 3), (4, 6, 12)] edge_cols = ["source", "target", "weight"] return StellarGraph( nodes=pd.DataFrame(index=nodes), edges=pd.DataFrame(edges, columns=edge_cols), )
def test_GCN_regularisers(): G, features = create_graph_features() adj = nx.to_numpy_array(G)[None, :, :] n_nodes = features.shape[0] nodes = G.nodes() node_features = pd.DataFrame.from_dict( {n: f for n, f in zip(nodes, features)}, orient="index") G = StellarGraph(G, node_features=node_features) generator = FullBatchNodeGenerator(G, sparse=False, method="none") gcn = GCN([2], generator) gcn = GCN([2], generator, kernel_initializer="ones") gcn = GCN([2], generator, kernel_initializer=initializers.ones()) with pytest.raises(ValueError): gcn = GCN([2], generator, kernel_initializer="fred") gcn = GCN([2], generator, bias_initializer="zeros") gcn = GCN([2], generator, bias_initializer=initializers.zeros()) with pytest.raises(ValueError): gcn = GCN([2], generator, bias_initializer="barney")
def test_GCN_activations(): G, features = create_graph_features() adj = nx.to_numpy_array(G)[None, :, :] n_nodes = features.shape[0] nodes = G.nodes() node_features = pd.DataFrame.from_dict( {n: f for n, f in zip(nodes, features)}, orient="index") G = StellarGraph(G, node_features=node_features) generator = FullBatchNodeGenerator(G, sparse=False, method="none") gcn = GCN([2], generator) assert gcn.activations == ["relu"] gcn = GCN([2, 2], generator) assert gcn.activations == ["relu", "relu"] gcn = GCN([2], generator, activations=["linear"]) assert gcn.activations == ["linear"] with pytest.raises(ValueError): # More regularisers than layers gcn = GCN([2], generator, activations=["relu", "linear"]) with pytest.raises(ValueError): # Fewer regularisers than layers gcn = GCN([2, 2], generator, activations=["relu"]) with pytest.raises(ValueError): # Unknown regularisers gcn = GCN([2], generator, activations=["bleach"])
def test_APPNP_apply_propagate_model_dense(): G, features = create_graph_features() adj = nx.to_scipy_sparse_matrix(G) features, adj = GCN_Aadj_feats_op(features, adj) adj = adj.todense()[None, :, :] n_nodes = features.shape[0] nodes = G.nodes() node_features = pd.DataFrame.from_dict( {n: f for n, f in zip(nodes, features)}, orient="index") G = StellarGraph(G, node_features=node_features) generator = FullBatchNodeGenerator(G, sparse=False, method="gcn") appnpnModel = APPNP([2], ["relu"], generator=generator, dropout=0.5) fully_connected_model = keras.Sequential() fully_connected_model.add(Dense(2)) x_in, x_out = appnpnModel.propagate_model(fully_connected_model) model = keras.Model(inputs=x_in, outputs=x_out) # Check fit method out_indices = np.array([[0, 1]], dtype="int32") preds_1 = model.predict([features[None, :, :], out_indices, adj]) assert preds_1.shape == (1, 2, 2) # Check fit_generator method preds_2 = model.predict_generator(generator.flow(["a", "b"])) assert preds_2.shape == (1, 2, 2) assert preds_1 == pytest.approx(preds_2)
def test_APPNP_apply_sparse(): G, features = create_graph_features() adj = nx.to_scipy_sparse_matrix(G) features, adj = GCN_Aadj_feats_op(features, adj) adj = adj.tocoo() A_indices = np.expand_dims(np.hstack((adj.row[:, None], adj.col[:, None])), 0) A_values = np.expand_dims(adj.data, 0) nodes = G.nodes() node_features = pd.DataFrame.from_dict( {n: f for n, f in zip(nodes, features)}, orient="index") G = StellarGraph(G, node_features=node_features) generator = FullBatchNodeGenerator(G, sparse=True, method="gcn") appnpnModel = APPNP([2], ["relu"], generator=generator, dropout=0.5) x_in, x_out = appnpnModel.node_model() model = keras.Model(inputs=x_in, outputs=x_out) # Check fit method out_indices = np.array([[0, 1]], dtype="int32") preds_1 = model.predict( [features[None, :, :], out_indices, A_indices, A_values]) assert preds_1.shape == (1, 2, 2) # Check fit_generator method preds_2 = model.predict_generator(generator.flow(["a", "b"])) assert preds_2.shape == (1, 2, 2) assert preds_1 == pytest.approx(preds_2)
def test_GCN_apply_sparse(): G, features = create_graph_features() adj = nx.to_numpy_array(G)[None, :, :] n_nodes = features.shape[0] nodes = G.nodes() node_features = pd.DataFrame.from_dict( {n: f for n, f in zip(nodes, features)}, orient="index") G = StellarGraph(G, node_features=node_features) generator = FullBatchNodeGenerator(G, sparse=False, method="none") gcnModel = GCN([2], ["relu"], generator=generator, dropout=0.5) x_in, x_out = gcnModel.node_model() model = keras.Model(inputs=x_in, outputs=x_out) # Check fit method out_indices = np.array([[0, 1]], dtype="int32") preds_1 = model.predict([features[None, :, :], out_indices, adj]) assert preds_1.shape == (1, 2, 2) # Check fit_generator method preds_2 = model.predict_generator(generator.flow(["a", "b"])) assert preds_2.shape == (1, 2, 2) assert preds_1 == pytest.approx(preds_2)
def test_PPNP_edge_cases(): G, features = create_graph_features() adj = nx.to_scipy_sparse_matrix(G) features, adj = PPNP_Aadj_feats_op(features, adj) nodes = G.nodes() node_features = pd.DataFrame.from_dict( {n: f for n, f in zip(nodes, features)}, orient="index") G = StellarGraph(G, node_features=node_features) ppnp_sparse_failed = False try: generator = FullBatchNodeGenerator(G, sparse=True, method="ppnp") except ValueError as e: ppnp_sparse_failed = True assert ppnp_sparse_failed generator = FullBatchNodeGenerator(G, sparse=False, method="ppnp") try: ppnpModel = PPNP([2, 2], ["relu"], generator=generator, dropout=0.5) except ValueError as e: error = e assert str( error) == "The number of layers should equal the number of activations" try: ppnpModel = PPNP([2], ["relu"], generator=[0, 1], dropout=0.5) except TypeError as e: error = e assert str( error) == "Generator should be a instance of FullBatchNodeGenerator"
def test_split_function_node_type(): # Example graph: g = create_example_graph_1() # This doesn't work if g is not a StellarGraph with pytest.raises(TypeError): splits = train_val_test_split( g, node_type="movie", test_size=1, train_size=2, targets=None, split_equally=False, seed=None, ) gs = StellarGraph(g) splits = train_val_test_split( gs, node_type="movie", test_size=1, train_size=2, targets=None, split_equally=False, seed=None, ) assert all(g.node[s]["label"] == "movie" for split in splits for s in split)
def create_test_graph(self_loop=False, multi=False): """ Creates a graph for testing the SampledHeterogeneousBreadthFirstWalk class. The node ids are string or integers. :return: A multi graph with 8 nodes and 8 to 10 edges (one isolated node, a self-loop if ``self_loop``, and a repeated edge if ``multi``) in StellarGraph format. """ nodes = { "user": pd.DataFrame(index=[0, 1, "5", 4, 7]), "movie": pd.DataFrame(index=[2, 3, 6]), } friends = [("5", 4), (1, 4), (1, "5")] friend_idx = [5, 6, 7] if self_loop: friends.append((7, 7)) friend_idx.append(8) edges = { "rating": pd.DataFrame([(1, 2), (1, 3), ("5", 6), ("5", 3), (4, 2)], columns=["source", "target"]), # 7 is an isolated node with a link back to itself "friend": pd.DataFrame(friends, columns=["source", "target"], index=friend_idx), } if multi: edges["colleague"] = pd.DataFrame([(1, 4)], columns=["source", "target"], index=[123]) return StellarGraph(nodes, edges)
def test_weighted_graph_label(self): g = nx.Graph() edges = [(1, 2), (2, 3), (3, 4), (4, 1)] g.add_edges_from(edges) g[1][2]["w"] = 1 g[2][3]["w"] = 2 g[3][4]["w"] = 3 g[4][1]["w"] = 4 g = StellarGraph(g, edge_weight_label="w") nodes = list(g.nodes()) n = 1 length = 1 seed = None p = 1.0 q = 1.0 biasedrw = BiasedRandomWalk(g) assert (len( biasedrw.run(nodes=nodes, n=n, p=p, q=q, length=length, seed=seed, weighted=True)) == 4) g = nx.Graph() edges = [(1, 2), (2, 3), (3, 4), (4, 1)] g.add_edges_from(edges) g[1][2]["wt"] = 1 g[2][3]["wt"] = 2 g[3][4]["wt"] = 3 g[4][1]["wt"] = 4 # Deliberately use wrong name for edge weight!? g = StellarGraph(g, edge_weight_label="w") nodes = list(g.nodes()) n = 1 length = 1 seed = None p = 1.0 q = 1.0 biasedrw = BiasedRandomWalk(g) with pytest.raises(ValueError): biasedrw.run(nodes=nodes, n=n, p=p, q=q, length=length, seed=seed, weighted=True)
def create_stellargraph(): nodes = pd.DataFrame([[1, 1], [1, 0], [0, 1], [0.5, 1]], index=["a", "b", "c", "d"]) edges = pd.DataFrame( [("a", "b", 1.0), ("b", "c", 0.4), ("a", "c", 2.0), ("b", "d", 10.0)], columns=["source", "target", "weight"], ) return StellarGraph(nodes, edges)
def create_stellargraph(): Gnx, features = create_graph_features() nodes = Gnx.nodes() node_features = pd.DataFrame.from_dict( {n: f for n, f in zip(nodes, features)}, orient="index" ) G = StellarGraph(Gnx, node_type_name="node", node_features=node_features) return G
def example_Graph_2(feature_size=None): G = nx.Graph() elist = [(1, 2), (2, 3), (1, 4), (4, 2)] G.add_edges_from(elist) # Add example features if feature_size is not None: for v in G.nodes(): G.node[v]["feature"] = int(v) * np.ones(feature_size) G = StellarGraph(G, node_features="feature") return G
def create_test_graph(): """ Creates a simple graph for testing the BreadthFirstWalk class. The node ids are string or integers. Each node also has a label based on the type of its id such that nodes with string ids and those with integer ids have labels 's' and 'n' respectively. Returns: A simple graph with 13 nodes and 24 edges (including self loops for all but two of the nodes) in networkx format. """ g = nx.Graph() edges = [ ("0", 1), ("0", 2), (1, 3), (1, 4), (3, 6), (4, "7"), (4, 8), (2, "5"), ("5", 9), ("5", 10), ("0", "0"), (1, 1), (3, 3), (6, 6), (4, 4), ("7", "7"), (8, 8), (2, 2), ("5", "5"), (9, 9), ( "self loner", "self loner", ), # node that is not connected with any other nodes but has self loop ] g.add_edges_from(edges) g.add_node( "loner" ) # node that is not connected to any other nodes and not having a self loop for node in g.nodes(): if type(node) == str: # make these type s for string g.node[node]["label"] = "s" else: # make these type n for number g.node[node]["label"] = "n" g = StellarGraph(g) return g
def test_GCN_init(): G, features = create_graph_features() nodes = G.nodes() node_features = pd.DataFrame.from_dict( {n: f for n, f in zip(nodes, features)}, orient="index") G = StellarGraph(G, node_type_name="node", node_features=node_features) generator = FullBatchNodeGenerator(G) gcnModel = GCN([2], ["relu"], generator=generator, dropout=0.5) assert gcnModel.layer_sizes == [2] assert gcnModel.activations == ["relu"] assert gcnModel.dropout == 0.5
def test_weighted_all_zero(self): edges = pd.DataFrame({ "source": [0, 0], "target": [1, 2], "weight": [0.0, 0] }) g = StellarGraph(edges=edges) bfw = SampledBreadthFirstWalk(g) walks = bfw.run(nodes=[0], n=10, n_size=[20, 20], weighted=True) assert len(walks) == 10 for walk in walks: assert len(walk) == 1 + 20 + 20 * 20 assert walk[0] == 0 np.testing.assert_array_equal(walk[1:], -1)
def create_test_graph(): """ Creates a simple graph for testing the BreadthFirstWalk class. The node ids are string or integers. Each node also has a label based on the type of its id such that nodes with string ids and those with integer ids have labels 's' and 'n' respectively. Returns: A simple graph with 13 nodes and 24 edges (including self loops for all but two of the nodes) in networkx format. """ nodes = { "s": pd.DataFrame(index=["0", "5", "7", "self loner", "loner"]), "n": pd.DataFrame(index=[1, 2, 3, 4, 6, 8, 9, 10]), } edges = pd.DataFrame( [ ("0", 1), ("0", 2), (1, 3), (1, 4), (3, 6), (4, "7"), (4, 8), (2, "5"), ("5", 9), ("5", 10), ("0", "0"), (1, 1), (3, 3), (6, 6), (4, 4), ("7", "7"), (8, 8), (2, 2), ("5", "5"), (9, 9), ( "self loner", "self loner", ), # node that is not connected with any other nodes but has self loop ], columns=["source", "target"], ) return StellarGraph(nodes, edges)
def test_GCN_apply(): G, features = create_graph_features() adj = nx.adjacency_matrix(G) nodes = G.nodes() node_features = pd.DataFrame.from_dict( {n: f for n, f in zip(nodes, features)}, orient="index" ) G = StellarGraph(G, node_type_name="node", node_features=node_features) generator = FullBatchNodeGenerator(G) gcnModel = GCN([2], ["relu"], generator=generator, dropout=0.5) x_in, x_out = gcnModel.node_model() model = keras.Model(inputs=x_in, outputs=x_out) preds = model.predict([features, adj], batch_size=adj.shape[0]) assert preds.shape == (3, 2)
def create_multi_test_graph(): """ Creates a multi graph for testing the SampledHeterogeneousBreadthFirstWalk class. The node ids are string or integers. :return: A multi graph with 8 nodes and 9 edges (with no self loops but 1 with only a self loop and 1 isolated node) in StellarGraph format. """ g = nx.MultiGraph() g.add_nodes_from([0, 1, "5", 4, 7], label="user") g.add_nodes_from([2, 3, 6], label="movie") g.add_edges_from([(1, 2), (1, 3), ("5", 6), ("5", 3), (4, 2)], label="rating") g.add_edges_from([("5", 4), (1, 4), (1, "5")], label="friend") g.add_edges_from([(1, 4)], label="colleague") return StellarGraph(g)
def create_test_graph(): """ Creates a simple graph for testing the BreadthFirstWalk class. The node ids are string or integers. :return: A simple graph with 13 nodes and 24 edges (including self loops for all but two of the nodes) in networkx format. """ g = nx.Graph() edges = [ ("0", 1), ("0", 2), (1, 3), (1, 4), (3, 6), (4, 7), (4, 8), (2, 5), (5, 9), (5, 10), ("0", "0"), (1, 1), (3, 3), (6, 6), (4, 4), (7, 7), (8, 8), (2, 2), (5, 5), (9, 9), ( "self loner", "self loner", ), # node that is not connected with any other nodes but has self loop ] g.add_edges_from(edges) g.add_node( "loner" ) # node that is not connected to any other nodes and not having a self loop g = StellarGraph(g) return g
def create_test_weighted_multigraph(): """ Creates a weighted multigraph for testing the weighted random biased walk method. The node ids are string or integers. :return: . """ g = nx.MultiGraph() edges = [ ("0", 1, 3), ("0", 1, 3), (1, 3, 1), (1, 4, 5), (2, 5, 7), (2, 5, 7), (3, 6, 9), (3, 6, 9), (4, 7, 2), (4, 8, 5), (5, 9, 5), (5, 10, 6), ("0", "0", 7), (1, 1, 8), (2, 2, 4), (3, 3, 8), (6, 6, 9), (4, 4, 1), (7, 7, 2), (8, 8, 3), (5, 5, 5), (9, 9, 6), ("self lonely", "self lonely", 0), # an isolated node with a self link ] g.add_weighted_edges_from(edges) g.add_node("lonely") # an isolated node without self link g = StellarGraph(g) return g
def create_test_graph(): """ Creates a simple graph for testing the BreadthFirstWalk class. The node ids are string or integers. :return: A simple graph with 13 nodes and 24 edges (including self loops for all but two of the nodes) in networkx format. """ g = nx.Graph() edges = [ ("0", 1), ("0", 2), (1, 3), (1, 4), (3, 6), (4, 7), (4, 8), (2, 5), (5, 9), (5, 10), ("0", "0"), (1, 1), (3, 3), (6, 6), (4, 4), (7, 7), (8, 8), (2, 2), (5, 5), (9, 9), ("self lonely", "self lonely"), # an isolated node with a self link ] g.add_edges_from(edges) g.add_node("lonely") # an isolated node without self link g = StellarGraph(g) return g
def create_test_weighted_graph(is_multigraph=False): """ Creates a simple graph for testing the weighted biased random walk class. The node ids are string or integers. :return: . """ nodes = pd.DataFrame( index=["0", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, "self loner", "loner"] ) edges = pd.DataFrame( [ ("0", 1, 3), ("0", 2, 4), (1, 3, 1), (1, 4, 7), (3, 6, 9), (4, 7, 2), (4, 8, 5), (2, 5, 7), (5, 9, 5), (5, 10, 6), ("0", "0", 7), (1, 1, 8), (3, 3, 8), (6, 6, 9), (4, 4, 1), (7, 7, 2), (8, 8, 3), (2, 2, 4), (5, 5, 5), (9, 9, 6), ("self loner", "self loner", 0), # an isolated node with a self link ], columns=["source", "target", "weight"], ) return StellarGraph(nodes, edges)
def test_weighted_graph_label(self): g = nx.Graph() edges = [(1, 2), (2, 3), (3, 4), (4, 1)] g.add_edges_from(edges) g[1][2]["w"] = 1 g[2][3]["w"] = 2 g[3][4]["w"] = 3 g[4][1]["w"] = 4 g = StellarGraph.from_networkx(g, edge_weight_attr="w") nodes = list(g.nodes()) n = 1 length = 1 seed = None p = 1.0 q = 1.0 biasedrw = BiasedRandomWalk(g) assert ( len( biasedrw.run( nodes=nodes, n=n, p=p, q=q, length=length, seed=seed, weighted=True ) ) == 4 ) g = nx.Graph() edges = [(1, 2), (2, 3), (3, 4), (4, 1)] g.add_edges_from(edges) g[1][2]["wt"] = 1 g[2][3]["wt"] = 2 g[3][4]["wt"] = 3 g[4][1]["wt"] = 4
def test_generator_parameter(self): g = create_test_graph() g = StellarGraph(g) # rw = UniformRandomWalk(g) sampler = UnsupervisedSampler(G=g) # generator should be provided with a valid batch size. i.e. an integer >=1 sample_gen = sampler.generator(batch_size=None) with pytest.raises(ValueError): next(sample_gen) sample_gen = sampler.generator(batch_size="x") with pytest.raises(TypeError): next(sample_gen) sample_gen = sampler.generator(batch_size=0) with pytest.raises(ValueError): next(sample_gen) sample_gen = sampler.generator(batch_size=3) with pytest.raises(ValueError): next(sample_gen)
def test_walk_biases(self): # a square with a triangle: # 0-3 # /| | # 1-2-4 nodes = pd.DataFrame(index=range(5)) edges = pd.DataFrame( [(0, 1), (0, 2), (0, 3), (1, 2), (2, 4), (3, 4)], columns=["source", "target"], ) graph = StellarGraph(nodes, edges) biasedrw = BiasedRandomWalk(graph) # there's 18 total walks of length 4 starting at 0 in `graph`, # and the non-tiny transition probabilities are always equal # so with a large enough sample, all the possible paths for a # given p, q should come up. nodes = [0] n = 1000 seed = None length = 4 always = 1e-20 never = 1e20 # always return to the last visited node p = always q = never walks = { tuple(w) for w in biasedrw.run(nodes=nodes, n=n, p=p, q=q, length=length, seed=seed) } assert walks == {(0, 1, 0, 1), (0, 2, 0, 2), (0, 3, 0, 3)} # always explore (when possible) p = never q = always walks = { tuple(w) for w in biasedrw.run(nodes=nodes, n=n, p=p, q=q, length=length, seed=seed) } assert walks == { # follow the square (0, 2, 4, 3), (0, 3, 4, 2), # go around the triangle (2 is a neighbour of 0 and so # isn't exploring, but q = never < 1) (0, 1, 2, 4), } # always go to a neighbour, if possible, otherwise equal # chance of returning or exploring p = never q = never walks = { tuple(w) for w in biasedrw.run(nodes=nodes, n=n, p=p, q=q, length=length, seed=seed) } assert walks == { # follow the triangle (0, 1, 2, 0), (0, 2, 1, 0), # all explorations around the square should appear (none # are neighbours) (0, 3, 0, 1), (0, 3, 0, 2), (0, 3, 0, 3), (0, 3, 4, 3), (0, 3, 4, 2), }