예제 #1
0
def test_distinct_random_walk():
    # Our test data for a balanced walk
    adj = sparse.csr_matrix(np.array([[0, 1, 0, 0],
                                      [1, 0, 1, 0],
                                      [0, 1, 0, 1],
                                      [0, 0, 1, 0]]))
    g = graph.CSGraph(adj, 1, 1)

    # Gets the entire connected component if the randow walk is long enough
    assert batching.distinct_random_walk(g, 0, 4) == {0, 1, 2, 3}
    # Gets a subset of the connected component if not
    assert batching.distinct_random_walk(g, 0, 3) == {0, 1, 2}

    # Test data for biased walks
    adj = sparse.csr_matrix(np.array([[0, 1, 1, 0, 0, 0],
                                      [1, 0, 1, 1, 0, 0],
                                      [1, 1, 0, 1, 1, 0],
                                      [0, 1, 1, 0, 1, 1],
                                      [0, 0, 1, 1, 0, 1],
                                      [0, 0, 0, 1, 1, 0]]))
    # Favour exclusively non-triad outer links
    g = graph.CSGraph(adj, 1, 1e-9)
    walks = set()
    for _ in range(100):
        walks.add(tuple(sorted(batching.distinct_random_walk(g, 0, 4))))
    assert walks == {(0, 1, 3, 4), (0, 1, 3, 5), (0, 2, 3, 5), (0, 2, 4, 5)}
    # Favour exclusively triad outer links
    g = graph.CSGraph(adj, 1e9, 1e9)
    walks = set()
    for _ in range(100):
        walks.add(tuple(sorted(batching.distinct_random_walk(g, 0, 4))))
    assert walks == {(0, 1, 2, 3)}
예제 #2
0
def test_CSGraph_init():
    # Works with good arguments
    adj = sparse.csr_matrix(np.array([[1, 1], [1, 0]]))
    adj.data[0] = 0
    g = graph.CSGraph(adj, .5, 4)
    # Got the parameters right
    assert g._p == .5
    assert g._q == 4
    # Eliminates zeros in adjacency matrix
    assert_array_equal(g.adj.data, [1, 1])
    assert_array_equal(g.adj.toarray(), [[0, 1], [1, 0]])
    # Computed the edge aliases
    assert set(g._edge_aliases.keys()) == {(0, 1), (1, 0)}
    assert_alias_equal(g._edge_aliases[(0, 1)], ([0], [1]))

    # Rejects non-sparse matrices
    with pytest.raises(ValueError):
        graph.CSGraph(np.array([[0, 1], [0, 0]]), .5, 4)

    # Rejects directed graphs
    with pytest.raises(AssertionError):
        graph.CSGraph(sparse.csr_matrix(np.array([[0, 1], [0, 0]])), .5, 4)

    # Rejects weighted graphs
    with pytest.raises(AssertionError):
        graph.CSGraph(sparse.csr_matrix(np.array([[0, 1], [2, 0]])), .5, 4)

    # Rejects diagonal elements
    with pytest.raises(AssertionError):
        graph.CSGraph(sparse.csr_matrix(np.array([[1, 1], [1, 0]])), .5, 4)
예제 #3
0
def test_CSGraph_eq():
    # Two graphs initialised with the same value are equal
    g1 = graph.CSGraph(
        sparse.csr_matrix(np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])), 2, 4)
    g2 = graph.CSGraph(
        sparse.csr_matrix(np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])), 2, 4)
    assert g1 == g2

    # Two graphs initialised with different matrices are different
    g1 = graph.CSGraph(
        sparse.csr_matrix(np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0]])), 2, 4)
    g2 = graph.CSGraph(
        sparse.csr_matrix(np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])), 2, 4)
    assert g1 != g2

    # Two graphs initialised with different p or q are different
    g1 = graph.CSGraph(
        sparse.csr_matrix(np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])), 2, 4)
    g2 = graph.CSGraph(
        sparse.csr_matrix(np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])), 3, 4)
    assert g1 != g2

    # Two graphs initialised with different matrices are different
    g1 = graph.CSGraph(
        sparse.csr_matrix(np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])), 2, 4)
    g2 = graph.CSGraph(
        sparse.csr_matrix(np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])), 2, 3)
    assert g1 != g2
예제 #4
0
def test_connected_component_or_none():
    # Our test data
    adj = sparse.csr_matrix(np.array([[0, 1, 1, 0, 0, 0],
                                      [1, 0, 0, 0, 0, 0],
                                      [1, 0, 0, 0, 0, 0],
                                      [0, 0, 0, 0, 0, 0],
                                      [0, 0, 0, 0, 0, 1],
                                      [0, 0, 0, 0, 1, 0]]))
    g = graph.CSGraph(adj, 1, 1)

    # Works well with good arguments, not maxing out
    assert batching.connected_component_or_none(g, 0, 3) == {0, 1, 2}
    assert batching.connected_component_or_none(g, 1, 4) == {0, 1, 2}
    assert batching.connected_component_or_none(g, 2, 5) == {0, 1, 2}
    assert batching.connected_component_or_none(g, 3, 5) == {3}
    assert batching.connected_component_or_none(g, 4, 2) == {4, 5}
    assert batching.connected_component_or_none(g, 5, 3) == {4, 5}

    # Works well with good arguments, maxing out
    assert batching.connected_component_or_none(g, 0, 2) is None
    assert batching.connected_component_or_none(g, 1, 2) is None
    assert batching.connected_component_or_none(g, 2, 2) is None
    assert batching.connected_component_or_none(g, 3, 0) is None
    assert batching.connected_component_or_none(g, 4, 1) is None
    assert batching.connected_component_or_none(g, 5, 1) is None
예제 #5
0
def test_batch_walks():
    # Yields the right number of walks to span the whole graph
    adj = sparse.csr_matrix(np.array([[0, 1, 0, 0, 0],
                                      [1, 0, 1, 0, 0],
                                      [0, 1, 0, 1, 0],
                                      [0, 0, 1, 0, 0],
                                      [0, 0, 0, 0, 0]]))
    g = graph.CSGraph(adj, 1, 1)
    walks = list(batching.batch_walks(g, 2, 2))
    assert len(walks) == 3
예제 #6
0
def test__collect_maxed_connected_component():
    # Our test data
    adj = sparse.csr_matrix(np.array([[0, 1, 1, 0, 0, 0],
                                      [1, 0, 0, 0, 0, 0],
                                      [1, 0, 0, 0, 0, 0],
                                      [0, 0, 0, 0, 0, 0],
                                      [0, 0, 0, 0, 0, 1],
                                      [0, 0, 0, 0, 1, 0]]))
    g = graph.CSGraph(adj, 1, 1)

    # Works well with good arguments, not maxing out
    collected = set()
    assert not batching._collect_maxed_connected_component(g, 0, 3, collected)
    assert collected == {0, 1, 2}
    collected = set()
    assert not batching._collect_maxed_connected_component(g, 1, 4, collected)
    assert collected == {0, 1, 2}
    collected = set()
    assert not batching._collect_maxed_connected_component(g, 2, 5, collected)
    assert collected == {0, 1, 2}
    collected = set()
    assert not batching._collect_maxed_connected_component(g, 3, 5, collected)
    assert collected == {3}
    collected = set()
    assert not batching._collect_maxed_connected_component(g, 4, 2, collected)
    assert collected == {4, 5}
    collected = set()
    assert not batching._collect_maxed_connected_component(g, 5, 3, collected)
    assert collected == {4, 5}

    # Works well with good arguments, maxing out
    assert batching._collect_maxed_connected_component(g, 0, 2, set())
    assert batching._collect_maxed_connected_component(g, 1, 2, set())
    assert batching._collect_maxed_connected_component(g, 2, 2, set())
    assert batching._collect_maxed_connected_component(g, 3, 0, set())
    assert batching._collect_maxed_connected_component(g, 4, 1, set())
    assert batching._collect_maxed_connected_component(g, 5, 1, set())

    # Raises an error if the source node has already been collected
    with pytest.raises(AssertionError):
        batching._collect_maxed_connected_component(g, 2, None, {2})
예제 #7
0
def test_epoch_batches(model_depth2):
    # Our test data
    adj = sparse.csr_matrix(np.array([[0, 1, 0, 0, 0],
                                      [1, 0, 1, 0, 0],
                                      [0, 1, 0, 1, 0],
                                      [0, 0, 1, 0, 0],
                                      [0, 0, 0, 0, 0]]))
    g = graph.CSGraph(adj, 1, 1)

    # Works well with good arguments, no sampling
    batches = list(batching.epoch_batches(model_depth2, g, 2, 2, None))
    assert len(batches) == 3
    required_nodes, final_nodes, feeds = batches[0]
    assert_array_equal(final_nodes, sorted(final_nodes))
    assert set(feeds.keys()) == {'layer1a_adj/indices',
                                 'layer1a_adj/values',
                                 'layer1a_adj/dense_shape',
                                 'layer1a_output_mask',
                                 'layer1b_adj/indices',
                                 'layer1b_adj/values',
                                 'layer1b_adj/dense_shape',
                                 'layer1b_output_mask',
                                 'layer2_adj/indices',
                                 'layer2_adj/values',
                                 'layer2_adj/dense_shape',
                                 'layer2_output_mask'}
    # len == 3 if node 4 was in the seeds for batch 0.
    # if node 4 is not in the seeds, it can be 2, 3, or 4 depending on the overlap of the walks.
    assert len(final_nodes) in [2, 3, 4]
    # Make sure we span all possible lengths when iterating enough
    lengths = set()
    for _ in range(100):
        _, final_nodes, _ = next(batching.epoch_batches(model_depth2, g, 2, 2, None))
        lengths.add(len(final_nodes))
    assert lengths == {2, 3, 4}

    # Works well with good arguments, with sampling
    batches = list(batching.epoch_batches(model_depth2, g, 2, 2, 1))
    assert len(batches) == 3
    required_nodes, final_nodes, feeds = batches[0]
    assert_array_equal(final_nodes, sorted(final_nodes))
    assert set(feeds.keys()) == {'layer1a_adj/indices',
                                 'layer1a_adj/values',
                                 'layer1a_adj/dense_shape',
                                 'layer1a_output_mask',
                                 'layer1b_adj/indices',
                                 'layer1b_adj/values',
                                 'layer1b_adj/dense_shape',
                                 'layer1b_output_mask',
                                 'layer2_adj/indices',
                                 'layer2_adj/values',
                                 'layer2_adj/dense_shape',
                                 'layer2_output_mask'}
    # len == 3 if node 4 was in the seeds for batch 0.
    # if node 4 is not in the seeds, it can be 2, 3, or 4 depending on the overlap of the walks.
    assert len(final_nodes) in [2, 3, 4]
    # Make sure we span all possible lengths when iterating enough
    lengths = set()
    for _ in range(100):
        _, final_nodes, _ = next(batching.epoch_batches(model_depth2, g, 2, 2, 1))
        lengths.add(len(final_nodes))
    assert lengths == {2, 3, 4}
예제 #8
0
def g():
    adj = [[0, 1, 1, 1, 0, 0], [1, 0, 0, 1, 0, 1], [1, 0, 0, 1, 0, 1],
           [1, 1, 1, 0, 1, 0], [0, 0, 0, 1, 0, 1], [0, 1, 1, 0, 1, 0]]
    return graph.CSGraph(sparse.csr_matrix(np.array(adj)), .5, 4)