def test_fullbatch_cluster_models(model_type): G = example_graph_random(n_nodes=50) generator = ClusterNodeGenerator(G, clusters=10) nodes = G.nodes()[:40] gen = generator.flow(nodes, targets=np.ones(len(nodes))) gnn = model_type( generator=generator, layer_sizes=[16, 16, 1], activations=["relu", "relu", "relu"], ) model = tf.keras.Model(*gnn.in_out_tensors()) model.compile(optimizer="adam", loss="binary_crossentropy") history = model.fit(gen, validation_data=gen, epochs=2) results = model.evaluate(gen) # this doesn't work for any cluster models including ClusterGCN # because the model spits out predictions with shapes: # [(1, cluster_1_size, feat_size), (1, cluster_2_size, feat_size)...] # and attempts to concatenate along axis 0 # predictions = model.predict(gen) x_in, x_out = gnn.in_out_tensors() x_out_flat = tf.squeeze(x_out, 0) embedding_model = tf.keras.Model(inputs=x_in, outputs=x_out_flat) predictions = embedding_model.predict(gen) assert predictions.shape == (len(nodes), 1)
def test_benchmark_ClusterGCN_generator(benchmark, q): G = example_graph_random(feature_size=10, n_nodes=1000, n_edges=5000) generator = ClusterNodeGenerator(G, clusters=10, q=q) seq = generator.flow(G.nodes()) # iterate over all the batches benchmark(lambda: list(seq))
def test_ClusterNodeSquence(): G = create_stellargraph() generator = ClusterNodeGenerator(G, clusters=1, q=1).flow( node_ids=["a", "b", "c", "d"] ) assert len(generator) == 1 generator = ClusterNodeGenerator(G, clusters=4, q=1).flow( node_ids=["a", "b", "c", "d"] ) assert len(generator) == 4 generator = ClusterNodeGenerator(G, clusters=4, q=1).flow( node_ids=["a", "b", "c", "d"] ) # ClusterNodeSequence returns the following: # [features, target_node_indices, adj_cluster], cluster_targets for batch in generator: assert len(batch) == 2 # The first dimension is the batch dimension necessary to make this work with Keras assert batch[0][0].shape == (1, 1, 2) assert batch[0][1].shape == (1, 1) # one node so that adjacency matrix is 1x1 assert batch[0][2].shape == (1, 1, 1) # no targets given assert batch[1] is None # Use 2 clusters generator = ClusterNodeGenerator(G, clusters=2, q=1).flow( node_ids=["a", "b", "c", "d"] ) assert len(generator) == 2 # ClusterNodeSequence returns the following: # [features, target_node_indices, adj_cluster], cluster_targets for batch in generator: assert len(batch) == 2 # The first dimension is the batch dimension necessary to make this work with Keras assert batch[0][0].shape == (1, 2, 2) assert batch[0][1].shape == (1, 2) # two nodes so that adjacency matrix is 2x2 assert batch[0][2].shape == (1, 2, 2) # no targets given assert batch[1] is None
def test_ClusterGCN_activations(): G = create_stellargraph() generator = ClusterNodeGenerator(G) # Test activations are set correctly cluster_gcn = ClusterGCN(layer_sizes=[2], generator=generator, activations=["relu"]) assert cluster_gcn.activations == ["relu"] cluster_gcn = ClusterGCN( layer_sizes=[2, 2], generator=generator, activations=["relu", "relu"] ) assert cluster_gcn.activations == ["relu", "relu"] cluster_gcn = ClusterGCN( layer_sizes=[2], generator=generator, activations=["linear"] ) assert cluster_gcn.activations == ["linear"] with pytest.raises(TypeError): # activations for layers must be specified ClusterGCN(layer_sizes=[2], generator=generator) with pytest.raises(AssertionError): # More activations than layers ClusterGCN(layer_sizes=[2], generator=generator, activations=["relu", "linear"]) with pytest.raises(AssertionError): # Fewer activations than layers ClusterGCN(layer_sizes=[2, 2], generator=generator, activations=["relu"]) with pytest.raises(ValueError): # Unknown activation ClusterGCN(layer_sizes=[2], generator=generator, activations=["bleach"])
def test_ClusterGCN_apply(): G = create_stellargraph() generator = ClusterNodeGenerator(G) cluster_gcn_model = ClusterGCN( layer_sizes=[2], generator=generator, activations=["relu"], dropout=0.0 ) x_in, x_out = cluster_gcn_model.build() model = keras.Model(inputs=x_in, outputs=x_out) # Check fit_generator method preds_2 = model.predict_generator(generator.flow(["a", "b", "c"])) assert preds_2.shape == (1, 3, 2)
def test_ClusterGCN_save_load(tmpdir): G, _ = create_graph_features() generator = ClusterNodeGenerator(G) cluster_gcn = ClusterGCN(layer_sizes=[2, 3], activations=["relu", "relu"], generator=generator) test_utils.model_save_load(tmpdir, cluster_gcn)
def _dispatch_generator(graph, model_name, params, generator_type="node"): """Create a graph generator.""" if model_name == "watchyourstep": return AdjacencyPowerGenerator( graph, num_powers=params["num_powers"]) elif model_name in ["complex", "distmult"]: return KGTripleGenerator(graph, params["batch_size"]) elif model_name == "attri2vec": if generator_type == "node": return Attri2VecNodeGenerator( graph, params["batch_size"]) else: return Attri2VecLinkGenerator( graph, params["batch_size"]) elif model_name in ["graphsage", "graphsage_dgi"]: if generator_type == "node": return GraphSAGENodeGenerator( graph, params["batch_size"], params["num_samples"]) else: return GraphSAGELinkGenerator( graph, params["batch_size"], params["num_samples"]) elif model_name in ["gcn_dgi", "gat_dgi"]: return FullBatchNodeGenerator(graph, sparse=False) elif model_name in ["cluster_gcn_dgi", "cluster_gat_dgi"]: return ClusterNodeGenerator( graph, clusters=params["clusters"], q=params["clusters_q"]) else: raise ValueError(f"Unknown model name '{model_name}'")
def test_ClusterGCN_apply(): G, _ = create_graph_features() generator = ClusterNodeGenerator(G) cluster_gcn_model = ClusterGCN(layer_sizes=[2], generator=generator, activations=["relu"], dropout=0.0) x_in, x_out = cluster_gcn_model.in_out_tensors() model = keras.Model(inputs=x_in, outputs=x_out) # Check fit method preds_2 = model.predict(generator.flow(["a", "b", "c"])) assert preds_2.shape == (1, 3, 2)
def test_cluster_weighted(): G = create_stellargraph() unweighted = ClusterNodeGenerator( G, clusters=1, q=1, weighted=False).flow(node_ids=["a", "b", "c", "d"]) weighted = ClusterNodeGenerator( G, clusters=1, q=1, weighted=True).flow(node_ids=["a", "b", "c", "d"]) assert len(unweighted) == len(weighted) == 1 unweighted_features, _ = unweighted[0] weighted_features, _ = weighted[0] def canonical(adj): return np.sort(adj.ravel()) assert not np.allclose(canonical(weighted_features[2]), canonical(unweighted_features[2]))
def test_ClusterGCN_init(): G, features = create_graph_features() generator = ClusterNodeGenerator(G) cluster_gcn_model = ClusterGCN(layer_sizes=[2], generator=generator, activations=["relu"], dropout=0.5) assert cluster_gcn_model.layer_sizes == [2] assert cluster_gcn_model.activations == ["relu"] assert cluster_gcn_model.dropout == 0.5
def _fit_deep_graph_infomax(train_graph, params, model_name): """Train unsupervised Deep Graph Infomax.""" if "gcn_dgi" in model_name or "gat_dgi" in model_name: if "cluster" in model_name: generator = ClusterNodeGenerator( train_graph, clusters=params["clusters"], q=params["clusters_q"]) else: generator = FullBatchNodeGenerator(train_graph, sparse=False) if "gcn_dgi" in model_name: embedding_layer = GCN( layer_sizes=[params["embedding_dimension"]], activations=["relu"], generator=generator) elif "gat_dgi" in model_name: embedding_layer = GAT( layer_sizes=[params["embedding_dimension"]], activations=["relu"], generator=generator, attn_heads=8) elif model_name == "graphsage_dgi": generator = GraphSAGENodeGenerator( train_graph, batch_size=50, num_samples=[5]) embedding_layer = GraphSAGE( layer_sizes=[params["embedding_dimension"]], activations=["relu"], generator=generator ) else: raise ValueError(f"Unknown mode name {model_name}") embedding_model = _execute_deep_graph_infomax( train_graph, embedding_layer, generator, params) # Here the models can be both inductive and transductive if model_name in ["gcn_dgi", "gat_dgi", "graphsage_dgi"]: return embedding_model.predict( generator.flow(train_graph.nodes())) else: return embedding_model
def test_kernel_and_bias_defaults(): graph, _ = create_graph_features() generator = ClusterNodeGenerator(graph) cluster_gcn = ClusterGCN(layer_sizes=[2, 2], activations=["relu", "relu"], generator=generator) for layer in cluster_gcn._layers: if isinstance(layer, GraphConvolution): assert isinstance(layer.kernel_initializer, tf.initializers.GlorotUniform) assert isinstance(layer.bias_initializer, tf.initializers.Zeros) assert layer.kernel_regularizer is None assert layer.bias_regularizer is None assert layer.kernel_constraint is None assert layer.bias_constraint is None
def test_ClusterGCN_init(): G, features = create_graph_features() nodes = G.nodes() node_features = pd.DataFrame.from_dict( {n: f for n, f in zip(nodes, features)}, orient="index" ) G = StellarGraph(G, node_type_name="node", node_features=node_features) generator = ClusterNodeGenerator(G) cluster_gcn_model = ClusterGCN( layer_sizes=[2], generator=generator, activations=["relu"], dropout=0.5 ) assert cluster_gcn_model.layer_sizes == [2] assert cluster_gcn_model.activations == ["relu"] assert cluster_gcn_model.dropout == 0.5
def test_ClusterGCN_regularisers(): G = create_stellargraph() generator = ClusterNodeGenerator(G) cluster_gcn = ClusterGCN( layer_sizes=[2], activations=["relu"], generator=generator, kernel_regularizer=keras.regularizers.l2(), ) with pytest.raises(ValueError): ClusterGCN( layer_sizes=[2], activations=["relu"], generator=generator, kernel_regularizer="fred", ) cluster_gcn = ClusterGCN( layer_sizes=[2], activations=["relu"], generator=generator, bias_initializer="zeros", ) cluster_gcn = ClusterGCN( layer_sizes=[2], activations=["relu"], generator=generator, bias_initializer=initializers.zeros(), ) with pytest.raises(ValueError): ClusterGCN( layer_sizes=[2], activations=["relu"], generator=generator, bias_initializer="barney", )
def test_ClusterNodeGenerator_init(): G = create_stellargraph() with pytest.raises(ValueError): generator = ClusterNodeGenerator(G, clusters=0) # clusters must be integer if not list with pytest.raises(TypeError): generator = ClusterNodeGenerator(G, clusters=0.5) # q must be greater than 0 with pytest.raises(ValueError): generator = ClusterNodeGenerator(G, q=0) # q must be integer with pytest.raises(TypeError): generator = ClusterNodeGenerator(G, q=1.0) # number of clusters is not exactly divisible by q with pytest.raises(ValueError): generator = ClusterNodeGenerator(G, clusters=5, q=2) # one cluster, k=len(clusters), not divisible by q with pytest.raises(ValueError): generator = ClusterNodeGenerator(G, clusters=[["a", "b", "c", "d"]], q=2) # this should be ok generator = ClusterNodeGenerator(G, clusters=[["a", "b", "c", "d"]], q=1) assert generator.k == 1 assert generator.q == 1 # two clusters, len(clusters). generator = ClusterNodeGenerator(G, clusters=[["a", "d"], ["b", "c"]], q=1) assert generator.k == 2 assert generator.q == 1 # lam has to be in the interval [0., 1.] and float with pytest.raises(TypeError): generator = ClusterNodeGenerator(G, clusters=1, q=1, lam=-1) with pytest.raises(TypeError): generator = ClusterNodeGenerator(G, clusters=1, q=1, lam=1) with pytest.raises(ValueError): generator = ClusterNodeGenerator(G, clusters=1, q=1, lam=2.5)
def test_ClusterNodeSequence_cluster_without_targets(): G = create_stellargraph() generator = ClusterNodeGenerator(G, clusters=2, q=1) seq = generator.flow(node_ids=["a"], targets=[0]) _ = list(seq)