def test_distinct_random_walk(): # Our test data for a balanced walk adj = sparse.csr_matrix(np.array([[0, 1, 0, 0], [1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0]])) g = graph.CSGraph(adj, 1, 1) # Gets the entire connected component if the randow walk is long enough assert batching.distinct_random_walk(g, 0, 4) == {0, 1, 2, 3} # Gets a subset of the connected component if not assert batching.distinct_random_walk(g, 0, 3) == {0, 1, 2} # Test data for biased walks adj = sparse.csr_matrix(np.array([[0, 1, 1, 0, 0, 0], [1, 0, 1, 1, 0, 0], [1, 1, 0, 1, 1, 0], [0, 1, 1, 0, 1, 1], [0, 0, 1, 1, 0, 1], [0, 0, 0, 1, 1, 0]])) # Favour exclusively non-triad outer links g = graph.CSGraph(adj, 1, 1e-9) walks = set() for _ in range(100): walks.add(tuple(sorted(batching.distinct_random_walk(g, 0, 4)))) assert walks == {(0, 1, 3, 4), (0, 1, 3, 5), (0, 2, 3, 5), (0, 2, 4, 5)} # Favour exclusively triad outer links g = graph.CSGraph(adj, 1e9, 1e9) walks = set() for _ in range(100): walks.add(tuple(sorted(batching.distinct_random_walk(g, 0, 4)))) assert walks == {(0, 1, 2, 3)}
def test_CSGraph_init(): # Works with good arguments adj = sparse.csr_matrix(np.array([[1, 1], [1, 0]])) adj.data[0] = 0 g = graph.CSGraph(adj, .5, 4) # Got the parameters right assert g._p == .5 assert g._q == 4 # Eliminates zeros in adjacency matrix assert_array_equal(g.adj.data, [1, 1]) assert_array_equal(g.adj.toarray(), [[0, 1], [1, 0]]) # Computed the edge aliases assert set(g._edge_aliases.keys()) == {(0, 1), (1, 0)} assert_alias_equal(g._edge_aliases[(0, 1)], ([0], [1])) # Rejects non-sparse matrices with pytest.raises(ValueError): graph.CSGraph(np.array([[0, 1], [0, 0]]), .5, 4) # Rejects directed graphs with pytest.raises(AssertionError): graph.CSGraph(sparse.csr_matrix(np.array([[0, 1], [0, 0]])), .5, 4) # Rejects weighted graphs with pytest.raises(AssertionError): graph.CSGraph(sparse.csr_matrix(np.array([[0, 1], [2, 0]])), .5, 4) # Rejects diagonal elements with pytest.raises(AssertionError): graph.CSGraph(sparse.csr_matrix(np.array([[1, 1], [1, 0]])), .5, 4)
def test_CSGraph_eq(): # Two graphs initialised with the same value are equal g1 = graph.CSGraph( sparse.csr_matrix(np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])), 2, 4) g2 = graph.CSGraph( sparse.csr_matrix(np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])), 2, 4) assert g1 == g2 # Two graphs initialised with different matrices are different g1 = graph.CSGraph( sparse.csr_matrix(np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0]])), 2, 4) g2 = graph.CSGraph( sparse.csr_matrix(np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])), 2, 4) assert g1 != g2 # Two graphs initialised with different p or q are different g1 = graph.CSGraph( sparse.csr_matrix(np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])), 2, 4) g2 = graph.CSGraph( sparse.csr_matrix(np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])), 3, 4) assert g1 != g2 # Two graphs initialised with different matrices are different g1 = graph.CSGraph( sparse.csr_matrix(np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])), 2, 4) g2 = graph.CSGraph( sparse.csr_matrix(np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])), 2, 3) assert g1 != g2
def test_connected_component_or_none(): # Our test data adj = sparse.csr_matrix(np.array([[0, 1, 1, 0, 0, 0], [1, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 1, 0]])) g = graph.CSGraph(adj, 1, 1) # Works well with good arguments, not maxing out assert batching.connected_component_or_none(g, 0, 3) == {0, 1, 2} assert batching.connected_component_or_none(g, 1, 4) == {0, 1, 2} assert batching.connected_component_or_none(g, 2, 5) == {0, 1, 2} assert batching.connected_component_or_none(g, 3, 5) == {3} assert batching.connected_component_or_none(g, 4, 2) == {4, 5} assert batching.connected_component_or_none(g, 5, 3) == {4, 5} # Works well with good arguments, maxing out assert batching.connected_component_or_none(g, 0, 2) is None assert batching.connected_component_or_none(g, 1, 2) is None assert batching.connected_component_or_none(g, 2, 2) is None assert batching.connected_component_or_none(g, 3, 0) is None assert batching.connected_component_or_none(g, 4, 1) is None assert batching.connected_component_or_none(g, 5, 1) is None
def test_batch_walks(): # Yields the right number of walks to span the whole graph adj = sparse.csr_matrix(np.array([[0, 1, 0, 0, 0], [1, 0, 1, 0, 0], [0, 1, 0, 1, 0], [0, 0, 1, 0, 0], [0, 0, 0, 0, 0]])) g = graph.CSGraph(adj, 1, 1) walks = list(batching.batch_walks(g, 2, 2)) assert len(walks) == 3
def test__collect_maxed_connected_component(): # Our test data adj = sparse.csr_matrix(np.array([[0, 1, 1, 0, 0, 0], [1, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 1, 0]])) g = graph.CSGraph(adj, 1, 1) # Works well with good arguments, not maxing out collected = set() assert not batching._collect_maxed_connected_component(g, 0, 3, collected) assert collected == {0, 1, 2} collected = set() assert not batching._collect_maxed_connected_component(g, 1, 4, collected) assert collected == {0, 1, 2} collected = set() assert not batching._collect_maxed_connected_component(g, 2, 5, collected) assert collected == {0, 1, 2} collected = set() assert not batching._collect_maxed_connected_component(g, 3, 5, collected) assert collected == {3} collected = set() assert not batching._collect_maxed_connected_component(g, 4, 2, collected) assert collected == {4, 5} collected = set() assert not batching._collect_maxed_connected_component(g, 5, 3, collected) assert collected == {4, 5} # Works well with good arguments, maxing out assert batching._collect_maxed_connected_component(g, 0, 2, set()) assert batching._collect_maxed_connected_component(g, 1, 2, set()) assert batching._collect_maxed_connected_component(g, 2, 2, set()) assert batching._collect_maxed_connected_component(g, 3, 0, set()) assert batching._collect_maxed_connected_component(g, 4, 1, set()) assert batching._collect_maxed_connected_component(g, 5, 1, set()) # Raises an error if the source node has already been collected with pytest.raises(AssertionError): batching._collect_maxed_connected_component(g, 2, None, {2})
def test_epoch_batches(model_depth2): # Our test data adj = sparse.csr_matrix(np.array([[0, 1, 0, 0, 0], [1, 0, 1, 0, 0], [0, 1, 0, 1, 0], [0, 0, 1, 0, 0], [0, 0, 0, 0, 0]])) g = graph.CSGraph(adj, 1, 1) # Works well with good arguments, no sampling batches = list(batching.epoch_batches(model_depth2, g, 2, 2, None)) assert len(batches) == 3 required_nodes, final_nodes, feeds = batches[0] assert_array_equal(final_nodes, sorted(final_nodes)) assert set(feeds.keys()) == {'layer1a_adj/indices', 'layer1a_adj/values', 'layer1a_adj/dense_shape', 'layer1a_output_mask', 'layer1b_adj/indices', 'layer1b_adj/values', 'layer1b_adj/dense_shape', 'layer1b_output_mask', 'layer2_adj/indices', 'layer2_adj/values', 'layer2_adj/dense_shape', 'layer2_output_mask'} # len == 3 if node 4 was in the seeds for batch 0. # if node 4 is not in the seeds, it can be 2, 3, or 4 depending on the overlap of the walks. assert len(final_nodes) in [2, 3, 4] # Make sure we span all possible lengths when iterating enough lengths = set() for _ in range(100): _, final_nodes, _ = next(batching.epoch_batches(model_depth2, g, 2, 2, None)) lengths.add(len(final_nodes)) assert lengths == {2, 3, 4} # Works well with good arguments, with sampling batches = list(batching.epoch_batches(model_depth2, g, 2, 2, 1)) assert len(batches) == 3 required_nodes, final_nodes, feeds = batches[0] assert_array_equal(final_nodes, sorted(final_nodes)) assert set(feeds.keys()) == {'layer1a_adj/indices', 'layer1a_adj/values', 'layer1a_adj/dense_shape', 'layer1a_output_mask', 'layer1b_adj/indices', 'layer1b_adj/values', 'layer1b_adj/dense_shape', 'layer1b_output_mask', 'layer2_adj/indices', 'layer2_adj/values', 'layer2_adj/dense_shape', 'layer2_output_mask'} # len == 3 if node 4 was in the seeds for batch 0. # if node 4 is not in the seeds, it can be 2, 3, or 4 depending on the overlap of the walks. assert len(final_nodes) in [2, 3, 4] # Make sure we span all possible lengths when iterating enough lengths = set() for _ in range(100): _, final_nodes, _ = next(batching.epoch_batches(model_depth2, g, 2, 2, 1)) lengths.add(len(final_nodes)) assert lengths == {2, 3, 4}
def g(): adj = [[0, 1, 1, 1, 0, 0], [1, 0, 0, 1, 0, 1], [1, 0, 0, 1, 0, 1], [1, 1, 1, 0, 1, 0], [0, 0, 0, 1, 0, 1], [0, 1, 1, 0, 1, 0]] return graph.CSGraph(sparse.csr_matrix(np.array(adj)), .5, 4)