def test_random_walk_edge_chain_pattern(rng, chain_length): """ Test subsampling a chain sub-graph """ n_iter = max(20, 100 * chain_length) multiplier = 4 graph = DirectedGraph() for v in range(chain_length): for i in range(multiplier): for j in range(multiplier): graph.add_edge((v, i), (v + 1, j)) subgraph = random_walk_edge_sample( graph, rng, n_iter, n_seeds=1, # any starting point in the chain use_opposite=True, use_both_ends=True, max_in_degree=1, max_out_degree=1) # Assert graph is a chain of expected length assert len(subgraph.edges) == chain_length assert (0 <= d <= 1 for _, d in subgraph.out_degree()) assert (0 <= d <= 1 for _, d in subgraph.in_degree()) # Find vertices - sorting here should retrieve the chaining vertices = sorted(list(subgraph.nodes)) assert all(v in set((i, m) for m in range(multiplier)) for i, v in enumerate(vertices)) # Inspect linkage assert all(e in subgraph.edges for e in zip(vertices, vertices[1:]))
def test_dataset(initializer_dict: Dict, expected_graph: DirectedGraph): """Check that wn18 dataset format is read and formated properly. Current test only covers reading embedding vectors from a file (e.g. word2vec pre-computed embeddings). Default randomly generated embeddings are not tested due to its trivial implementation and intrinsic randomness, that is more difficult to test. Args: initializer_dict (Dict): String representing wn18 dataset format expected_graph (DirectedGraph): Expected graph output TODO: add edgecases like plural empty lines, escape characters etc. """ with TempDirectory() as d: d.write(raw_dataset_pat['train'], initializer_dict['wn18_graph']) d.write(raw_dataset_pat['valid'], initializer_dict['wn18_graph']) d.write(raw_dataset_pat['test'], initializer_dict['wn18_graph']) d.write(preproc_pat['entity2id'], initializer_dict['entity2id']) d.write(preproc_pat['relation2id'], initializer_dict['relation2id']) w2v_dict_path = os.path.join(d.path, preproc_pat['word2vec_short']) with open(w2v_dict_path, 'wb') as f: pkl.dump(initializer_dict['w2v_dic'], f) ds: Dataset = Dataset(d.path, 'wn18', node2vec_path=w2v_dict_path) graph: DirectedGraph = DirectedGraph(ds.train) assert str(graph) == str(expected_graph) graph = DirectedGraph(ds.valid) assert str(graph) == str(expected_graph) graph = DirectedGraph(ds.test) assert str(graph) == str(expected_graph)
def test_dualize_relations(graph: DirectedGraph, expected_graph: DirectedGraph) -> None: """ test dual property """ result = graph.dualize_relations() assert result == expected_graph
def test_prune(graph: DirectedGraph, node_to_prune: Hashable, expected_graph: DirectedGraph) -> None: """ Verify that pruning the given node from graph yields the required graph """ result_graph = graph.prune(node_to_prune) assert result_graph == expected_graph
def test_rand_prune(graph: DirectedGraph, pruning_factor: float, seed: int) -> None: """ test that random pruning gives the same result with same seeding """ # initialize two random generator with same seed first_random_generator = random.Random() first_random_generator.seed(seed) second_random_generator = random.Random() second_random_generator.seed(seed) # prune graph using the two different random generators first_pruned_graph = graph.rand_prune( pruning_factor, random_generator=first_random_generator) second_pruned_graph = graph.rand_prune( pruning_factor, random_generator=second_random_generator) assert first_pruned_graph == second_pruned_graph
def test_integerify(graph: DirectedGraph, expected_graph: DirectedGraph): """ Verify that integerification yields a consistent graph. Ideally we would want a test of isomorphism, but it is complicated, so it tests if the integerification yields the correct integers for each node. Hence this is a reggression test. """ integerified_graph = graph.integerify() assert integerified_graph == expected_graph
def test_stringify(graph: DirectedGraph, expected_graph: DirectedGraph): """ Verify that stringification yields a consistent graph. Ideally we would want a test of isomorphism, but it is complicated, so it tests if the stringification yields the correct strings for each node. Hence this is a reggression test. """ stringified_graph = graph.stringify() assert all( set(node) <= __class__.ALLOWED_CHARS for node in stringified_graph) # type: ignore # pylint: disable=undefined-variable assert stringified_graph == expected_graph
def test_random_walk_edge_star_pattern(rng, max_degree): """ Test subsampling of {1, k} -> 0 and then 0 -> {1, k} """ n_iter = max(100, max_degree * 100) if max_degree <= 0: max_degree = 10 graph = DirectedGraph() for v in range(1, max_degree * 2): graph.add_edge(v, 0) subgraph = random_walk_edge_sample(graph, rng, n_iter, n_seeds=1, use_opposite=False, use_both_ends=False) assert len(subgraph.edges) == 1 # only the seed subgraph = random_walk_edge_sample(graph, rng, n_iter, n_seeds=1, use_opposite=False, use_both_ends=True) assert len(subgraph.edges) == 1 # only the seed # Here we activate matching of edges *->0 # => we can sample almost all the graph subgraph = random_walk_edge_sample(graph, rng, n_iter, n_seeds=1, use_opposite=True, use_both_ends=False, max_out_degree=1, max_in_degree=max_degree) assert len(subgraph.edges) == min(max_degree, n_iter)
def flush(self) -> None: """ Reset the structure, removing all composite arrows """ self._graph = DirectedGraph()
def test_random_walk_edge_functional_pattern(rng): """ Minimal test case for the different sampling head and direction Graph to use should enable precise testing of the options. 4 -> 0 -> 1 -> 2 5 <- 0 1 <- 3 """ n_iter = 100 # override fixture to stabilize the tests graph = DirectedGraph() graph.add_edge(0, 1) graph.add_edge(1, 2) graph.add_edge(3, 1) graph.add_edge(4, 0) graph.add_edge(0, 5) # Step 1 - use_opposite=False, use_both_ends=False # From (0, 1), we should sample only edges starting from 1 expected_subgraph = DirectedGraph() expected_subgraph.add_edge(0, 1) expected_subgraph.add_edge(1, 2) subgraph = random_walk_edge_sample(graph, rng, n_iter, seeds=[(0, 1)], use_opposite=False, use_both_ends=False) assert subgraph == expected_subgraph # Step 2 - use_opposite=False, use_both_ends=True # From (0, 1), we should sample only edges starting from 0 or 1 expected_subgraph = DirectedGraph() expected_subgraph.add_edge(0, 1) expected_subgraph.add_edge(1, 2) expected_subgraph.add_edge(0, 5) subgraph = random_walk_edge_sample(graph, rng, n_iter, seeds=[(0, 1)], use_opposite=False, use_both_ends=True) assert subgraph == expected_subgraph # Step 3 - use_opposite=True, use_both_ends=False expected_subgraph = DirectedGraph() expected_subgraph.add_edge(0, 1) expected_subgraph.add_edge(1, 2) expected_subgraph.add_edge(3, 1) subgraph = random_walk_edge_sample(graph, rng, n_iter, seeds=[(0, 1)], use_opposite=True, use_both_ends=False) assert subgraph == expected_subgraph # Step 4 - use_opposite=True, use_both_ends=True => catch all subgraph = random_walk_edge_sample(graph, rng, n_iter, seeds=[(0, 1)], use_opposite=True, use_both_ends=True) assert subgraph == graph
def test_subgraph(graph: DirectedGraph, nodes_set: Set[Hashable], expected_graph: DirectedGraph): """ Test that subgraph extraction gives the right graph """ assert graph.subgraph(nodes_set) == expected_graph
def test_over(graph: DirectedGraph, node: Hashable, nodes_set: Set[Hashable]) -> None: """ tests that the over method returns the required set of nodes """ assert graph.over(node) == frozenset(nodes_set)
class TestDirectedGraph: """ Unit tests for DirectedGraph class """ # allowed chars for stringification ALLOWED_CHARS = (frozenset(digits) | frozenset(ascii_letters)) # parameters for tests params: Dict[str, List[Any]] = { "test_init": [ dict(initializer_dict={ 0: [0, "1"], 2.: [()] }, expected_dict={ 0: frozenset({0, "1"}), "1": frozenset(), 2: frozenset({()}), (): frozenset() }), dict(initializer_dict={ 0: [1], 1: [[]] }, expected_dict=None) ], "test_op": [ dict(initializer_dict={ 0: [1], 1: [] }, expected_op=DirectedGraph({ 0: [], 1: [0] })), dict(initializer_dict={ 0: [1], 1: [0] }, expected_op=DirectedGraph({ 0: [1], 1: [0] })), dict(initializer_dict={ 0: [], 1: [] }, expected_op=DirectedGraph({ 0: [], 1: [] })) ], "test_delitem": [ dict(graph=DirectedGraph({ 0: [], 1: [0, 1], 2: [1] }), node_to_remove=1, expected_graph=DirectedGraph({ 0: [], 2: [] })), dict(graph=DirectedGraph({}), node_to_remove=1, expected_graph=DirectedGraph({})) ], "test_setitem": [ dict(graph=DirectedGraph({ 0: [], 1: [], 2: [] }), node_to_add=0, children=[1, 2], expected_graph=DirectedGraph({ 0: [1, 2], 1: [], 2: [] })), dict(graph=DirectedGraph({ 1: [], 2: [] }), node_to_add=0, children=[1, 2], expected_graph=DirectedGraph({ 0: [1, 2], 1: [], 2: [] })), dict(graph=DirectedGraph({}), node_to_add=0, children=[1, 2], expected_graph=DirectedGraph({ 0: [1, 2], 1: [], 2: [] })) ], "test_len": [ dict(graph=DirectedGraph({}), expected_length=0), dict(graph=DirectedGraph({ 0: [], 1: [] }), expected_length=2), dict(graph=DirectedGraph({ 0: [1], 1: [] }), expected_length=2) ], "test_iter": [ dict(graph=DirectedGraph({ 0: [], 1: [1], 2: [] }), nodes_set={0, 1, 2}), dict(graph=DirectedGraph({}), nodes_set={}) ], "test_under": [ dict(graph=DirectedGraph({ 0: [1, 2], 1: [], 2: [] }), node=0, nodes_set={1, 2}), dict(graph=DirectedGraph({ 0: [1, 2], 1: [], 2: [3, 1], 3: [4], 4: [] }), node=0, nodes_set={1, 2, 3, 4}) ], "test_over": [ dict(graph=DirectedGraph({ 0: [], 1: [0], 2: [0] }), node=0, nodes_set={1, 2}), dict(graph=DirectedGraph({ 0: [], 1: [0, 2], 2: [0], 3: [2], 4: [3] }), node=0, nodes_set={1, 2, 3, 4}) ], "test_subgraph": [ dict(graph=DirectedGraph({ 0: [], 1: [], 2: [] }), nodes_set={0}, expected_graph=DirectedGraph({0: []})), dict(graph=DirectedGraph({ 0: [0, 1], 1: [], 2: [0, 1] }), nodes_set={0, 1}, expected_graph=DirectedGraph({ 0: [0, 1], 1: [] })) ], "test_binary_operator": [ dict(binary_op=or_, first_graph=DirectedGraph({ 0: [1], 1: [] }), second_graph=DirectedGraph({ 0: [], 1: [1] }), expected_graph=DirectedGraph({ (0, 0): [(0, 1)], (0, 1): [], (1, 0): [], (1, 1): [(1, 1)] })), dict(binary_op=and_, first_graph=DirectedGraph({ 0: [1], 1: [] }), second_graph=DirectedGraph({ 0: [], 1: [1] }), expected_graph=DirectedGraph({ (0, 0): [], (0, 1): [(1, 1)], (1, 0): [], (1, 1): [] })), dict(binary_op=add, first_graph=DirectedGraph({ 0: [1], 1: [] }), second_graph=DirectedGraph({ 0: [], 1: [1] }), expected_graph=DirectedGraph({ (0, 0): [(0, 1), (1, 0), (1, 1)], (0, 1): [(1, 0), (1, 1)], (1, 0): [], (1, 1): [(1, 1)] })), dict(binary_op=matmul, first_graph=DirectedGraph({ 0: [1], 1: [] }), second_graph=DirectedGraph({ 0: [], 1: [1] }), expected_graph=DirectedGraph({ (0, 0): [(1, 0)], (1, 0): [], (0, 1): [(1, 1), (0, 1)], (1, 1): [(1, 1)] })), dict(binary_op=mul, first_graph=DirectedGraph({ 0: [1], 1: [] }), second_graph=DirectedGraph({ 0: [], 1: [1] }), expected_graph=DirectedGraph({ (0, 0): [(1, 0)], (1, 0): [], (0, 1): [(0, 1), (0, 1), (1, 1)], (1, 1): [(0, 1), (1, 1)] })) ], "test_prune": [ dict(graph=DirectedGraph({ 0: [1], 1: [2, 3], 2: [], 3: [], 4: [0, 1] }), node_to_prune=1, expected_graph=DirectedGraph({ 0: [2, 3], 2: [], 3: [], 4: [0, 2, 3] })) ], "test_integerify": [ dict(graph=DirectedGraph({ (0, 0): [], "1": [] }), expected_graph=DirectedGraph({ 0: [], 1: [] })), dict(graph=DirectedGraph({ 0.5: ["a"], "a": [], 2: [0.5, "a"] }), expected_graph=DirectedGraph({ 2: [1], 1: [], 0: [2, 1] })) ], "test_stringify": [ dict(graph=DirectedGraph({ (0, 0): [], "1": [] }), expected_graph=DirectedGraph({ "0x0": [], "0x1": [] })), dict(graph=DirectedGraph({ 0.5: ["a"], "a": [], 2: [0.5, "a"] }), expected_graph=DirectedGraph({ "0x2": ["0x1"], "0x1": [], "0x0": ["0x2", "0x1"] })) ], "test_rand_prune": [ dict(graph=DirectedGraph({ 0: [1, 2, 3, 4], 2: [3, 4, 6], 6: [5, 9, 11] })), dict(graph=DirectedGraph({0: []})), dict(graph=DirectedGraph({})) ], "test_dualize_relations": [ dict(graph=DirectedGraph({ 0: [1], 1: [] }), expected_graph=DirectedGraph({ 0: [1], 1: [] })), dict(graph=DirectedGraph({ 0: { 1: { "label": 1 } }, 1: [] }), expected_graph=DirectedGraph({ 0: { 1: { ("label", False): 1, ("label", True): 1 } }, 1: [] })) ] } @staticmethod def test_init(initializer_dict: Dict[Hashable, Iterable[Hashable]], expected_dict: Optional[Dict[Hashable, FrozenSet[Hashable]]]): """ check that initialization builds the correct graphs by adding missing nodes as keys. If the expected_dict is None, awaiting failure with a TypeError raised, because the initnializer_dict is invalid (some nodes are not hashable) """ if expected_dict is None: with pytest.raises(TypeError): graph: DirectedGraph = DirectedGraph[Hashable]( initializer_dict) else: graph = DirectedGraph[Hashable](initializer_dict) dict_of_graph = { node: frozenset(children) for node, children in graph.items() } assert dict_of_graph == expected_dict @staticmethod def test_op(initializer_dict: Dict[Hashable, Iterable[Hashable]], expected_op: DirectedGraph) -> None: """ test that the opposite of the graph is correctly computed at init """ graph = DirectedGraph[Hashable](initializer_dict) graph_op = graph.op assert graph_op == expected_op @staticmethod def test_delitem(graph: DirectedGraph, node_to_remove: Hashable, expected_graph: DirectedGraph): """ test that node deletion works as intended """ # copy graph graph_copy = copy(graph) # remove node from copy if node_to_remove in graph: del graph_copy[node_to_remove] # check the new keys of the graph are well defined assert graph_copy == expected_graph else: with pytest.raises(KeyError): # deleting non existing key should raise a key error del graph_copy[node_to_remove] @staticmethod def test_setitem(graph: DirectedGraph, node_to_add: Hashable, children: Iterable[Hashable], expected_graph: DirectedGraph) -> None: """ test that resetting the list of children of a node works as intended """ graph_copy = copy(graph) # reset list associated to node graph_copy[node_to_add] = children # verify that graph and its oppposite have expected value assert graph_copy == expected_graph assert graph_copy.op == expected_graph.op @staticmethod def test_len(graph: DirectedGraph, expected_length: int) -> None: """ Check that the length of the graph and its opposite are right """ assert len(graph) == expected_length assert len(graph.op) == expected_length @staticmethod def test_iter(graph: DirectedGraph, nodes_set: Set) -> None: """ Test that itarating over the nodes of the graph goes though all of the necessary nodes, once only """ assert Counter(iter(graph)) == Counter(nodes_set) @staticmethod def test_under(graph: DirectedGraph, node: Hashable, nodes_set: Set[Hashable]) -> None: """ tests that the under method returns the required set of nodes """ assert graph.under(node) == frozenset(nodes_set) @staticmethod def test_over(graph: DirectedGraph, node: Hashable, nodes_set: Set[Hashable]) -> None: """ tests that the over method returns the required set of nodes """ assert graph.over(node) == frozenset(nodes_set) @staticmethod def test_subgraph(graph: DirectedGraph, nodes_set: Set[Hashable], expected_graph: DirectedGraph): """ Test that subgraph extraction gives the right graph """ assert graph.subgraph(nodes_set) == expected_graph @staticmethod def test_binary_operator(binary_op: Callable[[Any, Any], Any], first_graph: DirectedGraph, second_graph: DirectedGraph, expected_graph: DirectedGraph) -> None: """ Verify that the given binary operator applied to first_graph and second_graph yields expected_graph. """ result_graph = binary_op(first_graph, second_graph) result_type = type(result_graph) expected_type = type(expected_graph) assert result_type == expected_type assert result_graph == expected_graph @staticmethod def test_prune(graph: DirectedGraph, node_to_prune: Hashable, expected_graph: DirectedGraph) -> None: """ Verify that pruning the given node from graph yields the required graph """ result_graph = graph.prune(node_to_prune) assert result_graph == expected_graph @staticmethod def test_integerify(graph: DirectedGraph, expected_graph: DirectedGraph): """ Verify that integerification yields a consistent graph. Ideally we would want a test of isomorphism, but it is complicated, so it tests if the integerification yields the correct integers for each node. Hence this is a reggression test. """ integerified_graph = graph.integerify() assert integerified_graph == expected_graph @staticmethod def test_stringify(graph: DirectedGraph, expected_graph: DirectedGraph): """ Verify that stringification yields a consistent graph. Ideally we would want a test of isomorphism, but it is complicated, so it tests if the stringification yields the correct strings for each node. Hence this is a reggression test. """ stringified_graph = graph.stringify() assert all( set(node) <= __class__.ALLOWED_CHARS for node in stringified_graph) # type: ignore # pylint: disable=undefined-variable assert stringified_graph == expected_graph @staticmethod def test_rand_prune(graph: DirectedGraph, pruning_factor: float, seed: int) -> None: """ test that random pruning gives the same result with same seeding """ # initialize two random generator with same seed first_random_generator = random.Random() first_random_generator.seed(seed) second_random_generator = random.Random() second_random_generator.seed(seed) # prune graph using the two different random generators first_pruned_graph = graph.rand_prune( pruning_factor, random_generator=first_random_generator) second_pruned_graph = graph.rand_prune( pruning_factor, random_generator=second_random_generator) assert first_pruned_graph == second_pruned_graph @staticmethod def test_dualize_relations(graph: DirectedGraph, expected_graph: DirectedGraph) -> None: """ test dual property """ result = graph.dualize_relations() assert result == expected_graph
class TestDataset: """ Unit tests for Dataset class entity2id and relation2id are provided and not generated through Dataset due to difficulty of managing multithreading randomness and mapping consistency. """ params: Dict[str, List[Any]] = { 'test_dataset': [ dict(initializer_dict={ 'wn18_graph': (b'a\t_member_of_domain_usage\tb\n' b'b\t_verb_group\tc\n' b'a\t_member_of_domain_region\td\n' b'd\t_member_meronym\tc\n'), 'w2v_dic': { 'a': [0.55, 0.45, 0.35], 'b': [0.55, 0.45, 0.35], 'c': [0.55, 0.45, 0.35], 'd': [0.55, 0.45, 0.35] }, 'entity2id': b'a\t0\nb\t1\nc\t2\nd\t3\n', 'relation2id': (b'_member_of_domain_usage\t0\n' b'_verb_group\t1\n' b'_member_of_domain_region\t2\n' b'_member_meronym\t3\n') }, expected_graph=DirectedGraph(((0, 1, { 0: None }), (1, 2, { 1: None }), (0, 3, { 2: None }), (3, 2, { 3: None })))), ] } @staticmethod def test_dataset(initializer_dict: Dict, expected_graph: DirectedGraph): """Check that wn18 dataset format is read and formated properly. Current test only covers reading embedding vectors from a file (e.g. word2vec pre-computed embeddings). Default randomly generated embeddings are not tested due to its trivial implementation and intrinsic randomness, that is more difficult to test. Args: initializer_dict (Dict): String representing wn18 dataset format expected_graph (DirectedGraph): Expected graph output TODO: add edgecases like plural empty lines, escape characters etc. """ with TempDirectory() as d: d.write(raw_dataset_pat['train'], initializer_dict['wn18_graph']) d.write(raw_dataset_pat['valid'], initializer_dict['wn18_graph']) d.write(raw_dataset_pat['test'], initializer_dict['wn18_graph']) d.write(preproc_pat['entity2id'], initializer_dict['entity2id']) d.write(preproc_pat['relation2id'], initializer_dict['relation2id']) w2v_dict_path = os.path.join(d.path, preproc_pat['word2vec_short']) with open(w2v_dict_path, 'wb') as f: pkl.dump(initializer_dict['w2v_dic'], f) ds: Dataset = Dataset(d.path, 'wn18', node2vec_path=w2v_dict_path) graph: DirectedGraph = DirectedGraph(ds.train) assert str(graph) == str(expected_graph) graph = DirectedGraph(ds.valid) assert str(graph) == str(expected_graph) graph = DirectedGraph(ds.test) assert str(graph) == str(expected_graph)