def test_walk_generation_single_root_node_self_loner(self): g = create_test_graph() bfw = SampledBreadthFirstWalk(g) nodes = ["self loner"] n = 1 n_size = [0] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs[0]) == expected_bfw_size(n_size=n_size) assert len(set(subgraphs[0])) == 1 # all elements should be same node assert nodes[0] in set(subgraphs[0]) n_size = [1] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs[0]) == expected_bfw_size(n_size=n_size) assert len(set(subgraphs[0])) == 1 # all elements should be same node assert nodes[0] in set(subgraphs[0]) n_size = [2, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs[0]) == expected_bfw_size(n_size=n_size) assert len(set(subgraphs[0])) == 1 # all elements should be same node assert nodes[0] in set(subgraphs[0]) n_size = [3, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs[0]) == expected_bfw_size(n_size=n_size) assert len(set(subgraphs[0])) == 1 # all elements should be same node assert nodes[0] in set(subgraphs[0]) n = 3 n_size = [0] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n * len(nodes) assert len(subgraphs[0]) == expected_bfw_size(n_size=n_size) assert len(set(subgraphs[0])) == 1 # all elements should be same node assert nodes[0] in set(subgraphs[0]) n_size = [1] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n * len(nodes) assert len(subgraphs[0]) == expected_bfw_size(n_size=n_size) assert len(set(subgraphs[0])) == 1 # all elements should be same node assert nodes[0] in set(subgraphs[0]) n_size = [2, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n * len(nodes) assert len(subgraphs[0]) == expected_bfw_size(n_size=n_size) assert len(set(subgraphs[0])) == 1 # all elements should be same node assert nodes[0] in set(subgraphs[0]) n_size = [3, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n * len(nodes) assert len(subgraphs[0]) == expected_bfw_size(n_size=n_size) assert len(set(subgraphs[0])) == 1 # all elements should the same node assert nodes[0] in set(subgraphs[0])
def test_benchmark_bfs_walk(self, benchmark): g = create_test_graph() bfw = SampledBreadthFirstWalk(g) nodes = ["0"] n = 5 n_size = [5, 5] benchmark(lambda: bfw.run(nodes=nodes, n=n, n_size=n_size))
def test_benchmark_bfs_walk(self, benchmark): g = example_graph_random(n_nodes=100, n_edges=500) bfw = SampledBreadthFirstWalk(g) nodes = np.arange(0, 50) n = 5 n_size = [5, 5] benchmark(lambda: bfw.run(nodes=nodes, n=n, n_size=n_size))
def test_fixed_random_seed(self): g = create_test_graph() bfw = SampledBreadthFirstWalk(g) w0 = bfw.run(nodes=[1], n=1, n_size=[7], seed=42) w1 = bfw.run(nodes=[1], n=1, n_size=[7], seed=1010) assert len(w0) == len(w1) assert w0 != w1 w0 = bfw.run(nodes=[1], n=1, n_size=[7], seed=42) w1 = bfw.run(nodes=[1], n=1, n_size=[7], seed=42) assert len(w0) == len(w1) assert w0 == w1 w0 = bfw.run(nodes=[1], n=5, n_size=[12], seed=101) w1 = bfw.run(nodes=[1], n=5, n_size=[12], seed=101) assert len(w0) == len(w1) assert w0 == w1 w0 = bfw.run(nodes=[9, "self loner"], n=1, n_size=[12], seed=101) w1 = bfw.run(nodes=[9, "self loner"], n=1, n_size=[12], seed=101) assert len(w0) == len(w1) assert w0 == w1 w0 = bfw.run(nodes=[1, "self loner", 4], n=5, n_size=[12], seed=101) w1 = bfw.run(nodes=[1, "self loner", 4], n=5, n_size=[12], seed=101) assert len(w0) == len(w1) assert w0 == w1
def test_walk_generation_single_root_node(self): g = create_test_graph() bfw = SampledBreadthFirstWalk(g) nodes = ["0"] n = 1 n_size = [0] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs[0]) == expected_bfw_size(n_size=n_size) # subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) # assert len(subgraphs[0]) == 2 n_size = [2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len( subgraphs[0]) == len(nodes) * n * expected_bfw_size(n_size=n_size) n_size = [3] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len( subgraphs[0]) == len(nodes) * n * expected_bfw_size(n_size=n_size) n_size = [1, 1] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len( subgraphs[0]) == len(nodes) * n * expected_bfw_size(n_size=n_size) n_size = [2, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len( subgraphs[0]) == len(nodes) * n * expected_bfw_size(n_size=n_size) n_size = [2, 2, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len( subgraphs[0]) == len(nodes) * n * expected_bfw_size(n_size=n_size) n_size = [2, 3] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len( subgraphs[0]) == len(nodes) * n * expected_bfw_size(n_size=n_size) n_size = [2, 3, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len( subgraphs[0]) == len(nodes) * n * expected_bfw_size(n_size=n_size)
def __init__(self, G, batch_size, num_samples, seed=None, name=None): if not isinstance(G, StellarGraphBase): raise TypeError("Graph must be a StellarGraph object.") G.check_graph_for_ml(features=True) self.graph = G self.num_samples = num_samples self.batch_size = batch_size self.name = name # We need a schema for compatibility with HinSAGE self.schema = G.create_graph_schema(create_type_maps=True) # The sampler used to generate random samples of neighbours self.sampler = SampledBreadthFirstWalk(G, graph_schema=self.schema, seed=seed)
def test_weighted_all_zero(self): edges = pd.DataFrame({ "source": [0, 0], "target": [1, 2], "weight": [0.0, 0] }) g = StellarGraph(edges=edges) bfw = SampledBreadthFirstWalk(g) walks = bfw.run(nodes=[0], n=10, n_size=[20, 20], weighted=True) assert len(walks) == 10 for walk in walks: assert len(walk) == 1 + 20 + 20 * 20 assert walk[0] == 0 np.testing.assert_array_equal(walk[1:], -1)
def test_walk_generation_single_root_node_loner(self): g = create_test_graph() bfw = SampledBreadthFirstWalk(g) nodes = ["loner"] n = 1 n_size = [0] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n assert len(subgraphs[0]) == 1 # all elements should the same node assert subgraphs[0][0] == "loner" n_size = [1] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n assert len(subgraphs[0]) == expected_bfw_size( n_size) # "loner" plus None assert subgraphs[0][0] == "loner" assert subgraphs[0][1] is None n_size = [2, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n # "loner" plus 2 * None + 2 * 2 * None assert len(subgraphs[0]) == expected_bfw_size(n_size) assert subgraphs[0][0] == "loner" assert subgraphs[0][1] is None assert subgraphs[0][2] is None assert subgraphs[0][6] is None n_size = [3, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n # "loner" plus 3 * None + 3 * 2 * None assert len(subgraphs[0]) == expected_bfw_size(n_size) assert subgraphs[0][0] == "loner" n = 3 n_size = [0] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n for subgraph in subgraphs: assert len(subgraph) == 1 # root node only assert subgraph[0] == "loner" n_size = [1] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n for subgraph in subgraphs: # "loner" plus None assert len(subgraph) == expected_bfw_size(n_size) assert subgraph[0] == "loner" n = 99 n_size = [2, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n for subgraph in subgraphs: # "loner" plus 2 * None + 2 * 2 * None assert len(subgraph) == expected_bfw_size(n_size) assert subgraph[0] == "loner" n = 17 n_size = [3, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n for subgraph in subgraphs: # "loner" plus 3 * None + 3 * 2 * None assert len(subgraph) == expected_bfw_size(n_size) assert subgraph[0] == "loner"
def test_walk_generation_number_of_walks_per_root_nodes(self): g = create_test_graph() bfw = SampledBreadthFirstWalk(g) nodes = [1] n = 2 n_size = [0] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == len(nodes) * n for i, subgraph in enumerate(subgraphs): assert len(subgraph) == expected_bfw_size(n_size=n_size) assert subgraph[0] == nodes[0] # should equal the root node n_size = [1] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == len(nodes) * n for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size) n_size = [2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == len(nodes) * n for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size) n_size = [3] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == len(nodes) * n for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size) ############################################################# nodes = [1, 5] n_size = [1] n = 2 subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n * len(nodes) for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size) n_size = [2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n * len(nodes) for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size) n_size = [3] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n * len(nodes) for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size) ############################################################# nodes = [1, 5] n_size = [2, 2] n = 3 subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n * len(nodes) for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size) n_size = [3, 3] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n * len(nodes) for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size) n_size = [4, 4] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n * len(nodes) for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size)
def test_parameter_checking(self): g = create_test_graph() bfw = SampledBreadthFirstWalk(g) nodes = ["0", 1] n = 1 n_size = [1] with pytest.raises(ValueError): # nodes should be a list of node ids even for a single node bfw.run(nodes=None, n=n, n_size=n_size) with pytest.raises(ValueError): bfw.run(nodes=0, n=n, n_size=n_size) # n has to be positive integer with pytest.raises(ValueError): bfw.run(nodes=nodes, n=-1, n_size=n_size) with pytest.raises(ValueError): bfw.run(nodes=nodes, n=10.1, n_size=n_size) with pytest.raises(ValueError): bfw.run(nodes=nodes, n=0, n_size=n_size) # n_size has to be list of positive integers with pytest.raises(ValueError): bfw.run(nodes=nodes, n=n, n_size=0) with pytest.raises(ValueError): bfw.run(nodes=nodes, n=n, n_size=[-5]) with pytest.raises(ValueError): bfw.run(nodes=nodes, n=-1, n_size=[2.4]) with pytest.raises(ValueError): bfw.run(nodes=nodes, n=n, n_size=(1, 2)) # seed must be positive integer or 0 with pytest.raises(ValueError): bfw.run(nodes=nodes, n=n, n_size=n_size, seed=-1235) with pytest.raises(ValueError): bfw.run(nodes=nodes, n=n, n_size=n_size, seed=10.987665) with pytest.raises(ValueError): bfw.run(nodes=nodes, n=n, n_size=n_size, seed=-982.4746) with pytest.raises(ValueError): bfw.run(nodes=nodes, n=n, n_size=n_size, seed="don't be random") # If no neighbours are sampled, then just the start node should be returned, e.g.: # subgraph = bfw.run(nodes=["0"], n=1, n_size=[]) # assert len(subgraph) == 1 # assert len(subgraph[0]) == 1 # assert subgraph[0][0] == "0" # However, by consensus this is an error: with pytest.raises(ValueError): bfw.run(nodes=["0"], n=1, n_size=[]) # If no root nodes are given, an empty list is returned which is not an error but I thought this method # is the best for checking this behaviour. nodes = [] subgraph = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraph) == 0
def test_walk_generation_many_root_nodes(self): g = create_test_graph() bfw = SampledBreadthFirstWalk(g) nodes = ["0", 2] n = 1 n_size = [0] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == len(nodes) * n for i, subgraph in enumerate(subgraphs): assert len(subgraph) == 1 assert subgraph[0] == nodes[i] # should equal the root node n_size = [1] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == len(nodes) * n for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size) n_size = [2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == len(nodes) * n for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size) n_size = [3] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == len(nodes) * n for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size) n_size = [1, 1] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == len(nodes) * n for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size) n_size = [2, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == len(nodes) * n for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size) n_size = [3, 3] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == len(nodes) * n for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size) n_size = [2, 3] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == len(nodes) * n for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size) n_size = [2, 3, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == len(nodes) * n for subgraph in subgraphs: assert len(subgraph) == expected_bfw_size(n_size=n_size)
def test_directed_walk_generation_single_root_node(self): g = nx.DiGraph() edges = [ ("root", 2), ("root", 1), ("root", "0"), (2, "c2.1"), (2, "c2.2"), (1, "c1.1"), ] g.add_edges_from(edges) g = StellarDiGraph(g) def _check_directed_walk(walk, n_size): if len(n_size) > 1 and n_size[0] > 0 and n_size[1] > 0: for child_pos in range(n_size[0]): child = walk[child_pos + 1] grandchildren_start = 1 + n_size[0] + child_pos * n_size[1] grandchildren_end = grandchildren_start + n_size[1] grandchildren = walk[grandchildren_start:grandchildren_end] if child == "root": # node with three children for grandchild in grandchildren: assert grandchild in [0, 1, 2] elif child == "0": # node without children for grandchild in grandchildren: assert grandchild == "root" elif child == 1: # node with single child for grandchild in grandchildren: assert grandchild in ["c1.1", "root"] elif child == 2: # node with two children for grandchild in grandchildren: assert grandchild in ["c2.1", "c2.2", "root"] else: assert 1 == 0 bfw = SampledBreadthFirstWalk(g) nodes = ["root"] n = 1 n_size = [0] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n assert len(subgraphs[0]) == 1 # all elements should be the same node assert subgraphs[0][0] == "root" n_size = [1] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n assert len(subgraphs[0]) == expected_bfw_size( n_size) # "root" plus child assert subgraphs[0][0] == "root" n_size = [2, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n # "root" plus 2 * child + 2 * 2 * grandchild or None assert len(subgraphs[0]) == expected_bfw_size(n_size) assert subgraphs[0][0] == "root" assert subgraphs[0][1] is not None assert subgraphs[0][2] is not None _check_directed_walk(subgraphs[0], n_size) n_size = [3, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n # "root" plus 3 * child + 3 * 2 * grandchild or None assert len(subgraphs[0]) == expected_bfw_size(n_size) assert subgraphs[0][0] == "root" _check_directed_walk(subgraphs[0], n_size) n = 3 n_size = [0] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n for subgraph in subgraphs: assert len(subgraph) == 1 # root node only assert subgraph[0] == "root" n_size = [1] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n for subgraph in subgraphs: # "root" plus child assert len(subgraph) == expected_bfw_size(n_size) assert subgraph[0] == "root" n = 99 n_size = [2, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n for subgraph in subgraphs: # "root" plus 2 * child + 2 * 2 * grandchild or None assert len(subgraph) == expected_bfw_size(n_size) assert subgraph[0] == "root" _check_directed_walk(subgraph, n_size) n = 17 n_size = [3, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n for subgraph in subgraphs: # "root" plus 3 * child + 3 * 2 * grandchild or None assert len(subgraph) == expected_bfw_size(n_size) assert subgraph[0] == "root"
class GraphSAGELinkGenerator: """A data generator for link prediction with Homogeneous GraphSAGE models At minimum, supply the StellarGraph, the batch size, and the number of node samples for each layer of the GraphSAGE model. The supplied graph should be a StellarGraph object that is ready for machine learning. Currently the model requires node features for all nodes in the graph. Use the :meth:`.flow` method supplying the nodes and (optionally) targets, or an UnsupervisedSampler instance that generates node samples on demand, to get an object that can be used as a Keras data generator. Example:: G_generator = GraphSageLinkGenerator(G, 50, [10,10]) train_data_gen = G_generator.flow(edge_ids) Args: G (StellarGraph): A machine-learning ready graph. batch_size (int): Size of batch of links to return. num_samples (list): List of number of neighbour node samples per GraphSAGE layer (hop) to take. seed (int or str), optional: Random seed for the sampling methods. name, optional: Name of generator """ def __init__(self, G, batch_size, num_samples, seed=None, name=None): if not isinstance(G, StellarGraphBase): raise TypeError("Graph must be a StellarGraph object.") G.check_graph_for_ml(features=True) self.graph = G self.num_samples = num_samples self.batch_size = batch_size self.name = name # We need a schema for compatibility with HinSAGE self.schema = G.create_graph_schema(create_type_maps=True) # The sampler used to generate random samples of neighbours self.sampler = SampledBreadthFirstWalk(G, graph_schema=self.schema, seed=seed) def sample_features(self, head_links, sampling_schema): """ Sample neighbours recursively from the head nodes, collect the features of the sampled nodes, and return these as a list of feature arrays for the GraphSAGE algorithm. Args: head_links: An iterable of edges to perform sampling for. sampling_schema: The sampling schema for the model Returns: A list of the same length as ``num_samples`` of collected features from the sampled nodes of shape: ``(len(head_nodes), num_sampled_at_layer, feature_size)`` where num_sampled_at_layer is the cumulative product of `num_samples` for that layer. """ node_type = sampling_schema[0][0][0] head_size = len(head_links) # The number of samples for each head node (not including itself) num_full_samples = np.sum(np.cumprod(self.num_samples)) # Reshape node samples to sensible format def get_levels(loc, lsize, samples_per_hop, walks): end_loc = loc + lsize walks_at_level = list(it.chain(*[w[loc:end_loc] for w in walks])) if len(samples_per_hop) < 1: return [walks_at_level] return [walks_at_level] + get_levels( end_loc, lsize * samples_per_hop[0], samples_per_hop[1:], walks ) # Get sampled nodes for the subgraphs for the edges where each edge is a tuple # of 2 nodes, so we are extracting 2 head nodes per edge batch_feats = [] for hns in zip(*head_links): node_samples = self.sampler.run(nodes=hns, n=1, n_size=self.num_samples) # Isolated nodes will return only themselves in the sample list # let's correct for this by padding with None (the dummy node ID) node_samples = [ ns + [None] * num_full_samples if len(ns) == 1 else ns for ns in node_samples ] nodes_per_hop = get_levels(0, 1, self.num_samples, node_samples) # Get features for the sampled nodes batch_feats.append( [ self.graph.get_feature_for_nodes(layer_nodes, node_type) for layer_nodes in nodes_per_hop ] ) # Resize features to (batch_size, n_neighbours, feature_size) # and re-pack features into a list where source, target feats alternate # This matches the GraphSAGE link model with (node_src, node_dst) input sockets: batch_feats = [ np.reshape(feats, (head_size, -1, feats.shape[1])) for ab in zip(*batch_feats) for feats in ab ] return batch_feats def flow(self, link_ids, targets=None, shuffle=False): """ Creates a generator/sequence object for training or evaluation with the supplied edge IDs and numeric targets. The edge IDs are the edges to train or inference on. They are expected to by tuples of (source_id, destination_id). The targets are an array of numeric targets corresponding to the supplied link_ids to be used by the downstream task. They should be given in the same order as the list of link IDs. If they are not specified (for example, for use in prediction), the targets will not be available to the downsteam task. Note that the shuffle argument should be True for training and False for prediction. Args: link_ids (list or UnsupervisedSampler): an iterable of (src_id, dst_id) tuples specifying the edges or an UnsupervisedSampler object that has a generator method to generate samples on the fly. targets (optional, array): a 2D array of numeric targets with shape `(len(link_ids), target_size)` shuffle (optional, bool): If True the node_ids will be shuffled at each epoch, if False the node_ids will be processed in order. Returns: A LinkSequence or OnDemandLinkSequence object to use with the GraphSAGE model methods :meth:`fit_generator`, :meth:`evaluate_generator`, and :meth:`predict_generator` """ # Pass sampler to on-demand link sequence generation if isinstance(link_ids, UnsupervisedSampler): return OnDemandLinkSequence(self, link_ids) # Otherwise pass iterable (check?) to standard LinkSequence elif isinstance(link_ids, collections.Iterable): return LinkSequence(self, link_ids, targets, shuffle) else: raise TypeError( "Argument to .flow not recognised. " "Please pass a list of samples or a UnsupervisedSampler object." )
def test_directed_walk_generation_single_root_node(self, tree_graph): def _check_directed_walk(walk, n_size): if len(n_size) > 1 and n_size[0] > 0 and n_size[1] > 0: for child_pos in range(n_size[0]): child = walk[child_pos + 1] grandchildren_start = 1 + n_size[0] + child_pos * n_size[1] grandchildren_end = grandchildren_start + n_size[1] grandchildren = walk[grandchildren_start:grandchildren_end] if child == "root": # node with three children for grandchild in grandchildren: assert grandchild in [0, 1, 2] elif child == "0": # node without children for grandchild in grandchildren: assert grandchild == "root" elif child == 1: # node with single child for grandchild in grandchildren: assert grandchild in ["c1.1", "root"] elif child == 2: # node with two children for grandchild in grandchildren: assert grandchild in ["c2.1", "c2.2", "root"] else: assert 1 == 0 bfw = SampledBreadthFirstWalk(tree_graph) nodes = tree_graph.node_ids_to_ilocs(["root"]) n = 1 n_size = [0] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n assert len(subgraphs[0]) == 1 # all elements should be the same node assert subgraphs[0][0] == tree_graph.node_ids_to_ilocs(["root"])[0] n_size = [1] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n assert len(subgraphs[0]) == expected_bfw_size( n_size) # "root" plus child assert subgraphs[0][0] == tree_graph.node_ids_to_ilocs(["root"])[0] n_size = [2, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n # "root" plus 2 * child + 2 * 2 * grandchild or None assert len(subgraphs[0]) == expected_bfw_size(n_size) assert subgraphs[0][0] == tree_graph.node_ids_to_ilocs(["root"])[0] assert subgraphs[0][1] != -1 assert subgraphs[0][2] != -1 _check_directed_walk(tree_graph.node_ilocs_to_ids(subgraphs[0]), n_size) n_size = [3, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n # "root" plus 3 * child + 3 * 2 * grandchild or None assert len(subgraphs[0]) == expected_bfw_size(n_size) assert subgraphs[0][0] == tree_graph.node_ids_to_ilocs(["root"])[0] _check_directed_walk(tree_graph.node_ilocs_to_ids(subgraphs[0]), n_size) n = 3 n_size = [0] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n for subgraph in subgraphs: assert len(subgraph) == 1 # root node only assert subgraph[0] == tree_graph.node_ids_to_ilocs(["root"])[0] n_size = [1] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n for subgraph in subgraphs: # "root" plus child assert len(subgraph) == expected_bfw_size(n_size) assert subgraph[0] == tree_graph.node_ids_to_ilocs(["root"])[0] n = 99 n_size = [2, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n for subgraph in subgraphs: # "root" plus 2 * child + 2 * 2 * grandchild or None assert len(subgraph) == expected_bfw_size(n_size) assert subgraph[0] == tree_graph.node_ids_to_ilocs(["root"])[0] _check_directed_walk(tree_graph.node_ilocs_to_ids(subgraph), n_size) n = 17 n_size = [3, 2] subgraphs = bfw.run(nodes=nodes, n=n, n_size=n_size) assert len(subgraphs) == n for subgraph in subgraphs: # "root" plus 3 * child + 3 * 2 * grandchild or None assert len(subgraph) == expected_bfw_size(n_size) assert subgraph[0] == tree_graph.node_ids_to_ilocs(["root"])[0]
def test_weighted(self): g, checker = weighted_tree() bfw = SampledBreadthFirstWalk(g) walks = bfw.run(nodes=[0], n=10, n_size=[20, 20], weighted=True) checker(node_id for walk in walks for node_id in walk)