def testGetSimilarNodesToQueryNode(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database( dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) sketch_matrix = SketchMatrix(25, 265, ch_matrix) similar_nodes_exp = np.array([0, 5, 7]) similar_nodes, _ = similar_nodes_mining.get_similar_nodes( "n_7", dummy_hypergraph, sketch_matrix, 0, [], r_in=3, r_out=2, r_all=0) equality = similar_nodes_exp == similar_nodes if type(equality) is not bool: equality = equality.all() self.assertTrue( equality, "Wrong similar nodes were extracted (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.)." )
def testCharacteristicMatrix_JaccardSimMatrix(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix() equality = (self.ch_matrix_jaccard_sim_exp == ch_matrix_jaccard_sim).all() self.assertTrue(equality, "The computed Jaccard similarity matrix is wrong.")
def testCharacteristicMatrix_ReadWrite(self): file_name = "test_files/characteristic_matrix.tmp" dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=2, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=4) ch_matrix.save_to_file(file_name) read_ch_matrix = CharacteristicMatrix.load_from_file(file_name) self.assertEqual(read_ch_matrix, ch_matrix, "The read characteristic matrix is different from the saved one.")
def calculate_ch_matrix(): in_files = helpers.datasets[dataset]["files"] print "Converting RDF to NetworkX graph started at", time.strftime( time_format) start = time.time() graph, node_id_map = rdf.convert_rdf_to_nx_graph(in_files, discard_classes=False) print "Converting RDF to NetworkX graph took", time.time() - start, "s" print "-----------------------------------------" print "Saving NodeID map started at", time.strftime(time_format) start = time.time() inout.save_to_file(node_id_map, path + "{0}_node_id_map".format(dataset)) print "Saving NodeID map took", time.time() - start, "s" print "-----------------------------------------" print "Building hypergraph started at", time.strftime(time_format) start = time.time() hypergraph = Hypergraph(graph) print "Building hypergraph took", time.time() - start, "s" print "-----------------------------------------" print "Saving hypergraph started at", time.strftime(time_format) start = time.time() hypergraph.save_to_file(path + "{0}_hgraph".format(dataset)) print "Saving hypergraph took", time.time() - start, "s" print "-----------------------------------------" print "Building characteristic matrix started at", time.strftime( time_format) start = time.time() rballs_database, index_node_map = similar_nodes_mining.extract_rballs_database( hypergraph, r_in=r_in, r_out=r_out, r_all=r_all) ch_matrix = CharacteristicMatrix(rballs_database, hypergraph.number_of_nodes(), wl_iterations=wl_iterations, print_progress=True) print "Building characteristic matrix took", time.time() - start, "s" print "-----------------------------------------" print "Saving Column index to Node map started at", time.strftime( time_format) start = time.time() inout.save_to_file(index_node_map, path + "{0}_index_node_map".format(dataset)) print "Saving Column index to Node map took", time.time() - start, "s" print "-----------------------------------------" print "Saving characteristic matrix started at", time.strftime(time_format) start = time.time() ch_matrix.save_to_file(path + "{0}_ch_matrix".format(dataset)) print "Saving characteristic matrix took", time.time() - start, "s" print "-----------------------------------------" return ch_matrix, hypergraph, index_node_map, node_id_map
def testCharacteristicMatrix(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database( dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) self.assertEqual(self.raw_ch_matrix_exp, ch_matrix.sparse_matrix, "The computed characteristic matrix is wrong.")
def testSketchMatrix_ReadWrite(self): file_name = "test_files/sketch_matrix.tmp" dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=2, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=4) sketch_matrix = SketchMatrix(5, 20, ch_matrix) sketch_matrix.save_to_file(file_name) read_sketch_matrix = SketchMatrix.load_from_file(file_name) equality = (read_sketch_matrix.matrix == sketch_matrix.matrix).all() self.assertTrue(equality, "The read sketch matrix is different from the saved one.")
def testSimilarNodesMining(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix() similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8, dtype=np.float32) sketch_matrix = SketchMatrix(25, 265, ch_matrix) similarity_matrix = similar_nodes_mining.get_node_similarity_matrix(sketch_matrix) equality = (similarity_matrix_exp == similarity_matrix).all() self.assertTrue(equality, "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.).")
def testGetSimilarNodesToQueryNode(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) sketch_matrix = SketchMatrix(25, 265, ch_matrix) similar_nodes_exp = np.array([0, 5, 7]) similar_nodes, _ = similar_nodes_mining.get_similar_nodes("n_7", dummy_hypergraph, sketch_matrix, 0, [], r_in=3, r_out=2, r_all=0) equality = similar_nodes_exp == similar_nodes if type(equality) is not bool: equality = equality.all() self.assertTrue(equality, "Wrong similar nodes were extracted (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.).")
def testCharacteristicMatrix_JaccardSimMatrix(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database( dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix() equality = ( self.ch_matrix_jaccard_sim_exp == ch_matrix_jaccard_sim).all() self.assertTrue(equality, "The computed Jaccard similarity matrix is wrong.")
def testCharacteristicMatrix_ReadWrite(self): file_name = "test_files/characteristic_matrix.tmp" dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database( dummy_hypergraph, r_in=2, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=4) ch_matrix.save_to_file(file_name) read_ch_matrix = CharacteristicMatrix.load_from_file(file_name) self.assertEqual( read_ch_matrix, ch_matrix, "The read characteristic matrix is different from the saved one.")
def testSketchMatrix_ReadWrite(self): file_name = "test_files/sketch_matrix.tmp" dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database( dummy_hypergraph, r_in=2, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=4) sketch_matrix = SketchMatrix(5, 20, ch_matrix) sketch_matrix.save_to_file(file_name) read_sketch_matrix = SketchMatrix.load_from_file(file_name) equality = (read_sketch_matrix.matrix == sketch_matrix.matrix).all() self.assertTrue( equality, "The read sketch matrix is different from the saved one.")
def calculate_ch_matrix(): in_files = helpers.datasets[dataset]["files"] print "Converting RDF to NetworkX graph started at", time.strftime(time_format) start = time.time() graph, node_id_map = rdf.convert_rdf_to_nx_graph(in_files, discard_classes=False) print "Converting RDF to NetworkX graph took", time.time() - start, "s" print "-----------------------------------------" print "Saving NodeID map started at", time.strftime(time_format) start = time.time() inout.save_to_file(node_id_map, path + "{0}_node_id_map".format(dataset)) print "Saving NodeID map took", time.time() - start, "s" print "-----------------------------------------" print "Building hypergraph started at", time.strftime(time_format) start = time.time() hypergraph = Hypergraph(graph) print "Building hypergraph took", time.time() - start, "s" print "-----------------------------------------" print "Saving hypergraph started at", time.strftime(time_format) start = time.time() hypergraph.save_to_file(path + "{0}_hgraph".format(dataset)) print "Saving hypergraph took", time.time() - start, "s" print "-----------------------------------------" print "Building characteristic matrix started at", time.strftime(time_format) start = time.time() rballs_database, index_node_map = similar_nodes_mining.extract_rballs_database(hypergraph, r_in=r_in, r_out=r_out, r_all=r_all) ch_matrix = CharacteristicMatrix(rballs_database, hypergraph.number_of_nodes(), wl_iterations=wl_iterations, print_progress=True) print "Building characteristic matrix took", time.time() - start, "s" print "-----------------------------------------" print "Saving Column index to Node map started at", time.strftime(time_format) start = time.time() inout.save_to_file(index_node_map, path + "{0}_index_node_map".format(dataset)) print "Saving Column index to Node map took", time.time() - start, "s" print "-----------------------------------------" print "Saving characteristic matrix started at", time.strftime(time_format) start = time.time() ch_matrix.save_to_file(path + "{0}_ch_matrix".format(dataset)) print "Saving characteristic matrix took", time.time() - start, "s" print "-----------------------------------------" return ch_matrix, hypergraph, index_node_map, node_id_map
def testSimilarNodesMining(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database( dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix() similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8, dtype=np.float32) sketch_matrix = SketchMatrix(25, 265, ch_matrix) similarity_matrix = similar_nodes_mining.get_node_similarity_matrix( sketch_matrix) equality = (similarity_matrix_exp == similarity_matrix).all() self.assertTrue( equality, "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.)." )
def testCharacteristicMatrix(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) self.assertEqual(self.raw_ch_matrix_exp, ch_matrix.sparse_matrix, "The computed characteristic matrix is wrong.")
def run_algorithm(graph, return_features=False, compute_string=True): '''Performs the algorithm proposed by Arnborg & Proskurowski on a graph with tree-width at most 3. :param graph: A NetworkX graph or a Hypergraph. :param return_features: (default False) If true, returns the features, which were reduced by the algorithm. :param compute_string: (default True) If True returns the canonical string representation of the graph. False means to perform the reduction rules without computing the canonical string. :return A tuple of the form (tree_width, canonical_string[, reduced_features]). ''' def is_done(hypergraph): if hypergraph.number_of_edges() == 0: return True else: return False def collect_labels(hypergraph): labels = [] for node in hypergraph.nodes_iter(): labels.append(hypergraph.node[node]["labels"][0]) labels.sort() return u",".join(labels) def rule_0(hypergraph, compute_string): modified = False # (originally 1.3) - remove self-loops self_loops = list(hypergraph.self_loops) if len(self_loops) > 0: modified = True if compute_string: for self_loop in self_loops: node = hypergraph.endpoints(self_loop)[0] hypergraph.add_node_label(node, hypergraph.edge(self_loop)["labels"][0]) hypergraph.remove_edge(self_loop) else: hypergraph.remove_edges_from(self_loops, unsafe=True) # rule 0.1 if compute_string: nodes_with_more_labels = list(hypergraph.nodes_with_more_labels) if len(nodes_with_more_labels) > 0: modified = True for node in nodes_with_more_labels: labels = hypergraph.node[node]["labels"] labels.sort() new_label = u"(0.1;{0})".format(u",".join(labels)) hypergraph.set_node_labels(node, [new_label]) hypergraph.reset_nodes_with_more_labels() # rule 0.2 parallel_edges_groups_keys = list(hypergraph.parallel_edges_groups.keys()) if len(parallel_edges_groups_keys) > 0: modified = True for key in parallel_edges_groups_keys: edges_group = list(hypergraph.parallel_edges_groups[key]) endpoints = hypergraph.endpoints(edges_group[0]) if compute_string: perms = permutations(endpoints) possible_labels = [] for perm in perms: possible_label = {} possible_label["perm"] = perm possible_label["label"] = [] for edge in edges_group: possible_label["label"].append(Hypergraph.edge_to_string(hypergraph, edge, perm)) possible_label["label"].sort() possible_label["label"] = u"(0.2;{0})".format(u",".join(possible_label["label"])) possible_labels.append(possible_label) possible_labels = sorted(possible_labels, key=lambda element: element["label"]) minimal_label = possible_labels[0]["label"] minimal_perm_indices = filter(lambda i: possible_labels[i]["label"] == minimal_label, range(len(possible_labels))) direction = set([possible_labels[i]["perm"] for i in minimal_perm_indices]) hypergraph.remove_edges_from(edges_group, unsafe=True) hypergraph.add_edge(endpoints, direction, minimal_label) else: hypergraph.remove_edges_from(edges_group, unsafe=True) hypergraph.add_edge(endpoints, set(), "") hypergraph.reset_parallel_edges_groups() return modified def rule_1(hypergraph, return_features=False, compute_string=True): modified = False pendant_features = ReducibleFeature.extract_rule_1_features(hypergraph) if return_features: pendant_features = list(pendant_features) affected_nodes = set() for feature in pendant_features: if not modified: modified = True feature.reduce(hypergraph, compute_string) affected_nodes |= set(feature.reducible_nodes) | set(feature.peripheral_nodes) hypergraph.update_nodes_with_n_neighbors(affected_nodes) return modified, pendant_features if return_features else None def rule_2(hypergraph, return_features=False, compute_string=True): modified = False series_features = ReducibleFeature.extract_rule_2_features(hypergraph) if return_features: series_features = list(series_features) affected_nodes = set() new_edges = set() for feature in series_features: if not modified: modified = True _new_edges = feature.reduce(hypergraph, compute_string) affected_nodes |= set(feature.reducible_nodes) | set(feature.peripheral_nodes) new_edges |= _new_edges hypergraph.update_parallel_edges_groups(new_edges) hypergraph.update_nodes_with_n_neighbors(affected_nodes) return modified, series_features if return_features else None def rule_3(hypergraph): modified = False parallel_hedges_groups_keys = list(hypergraph.parallel_hedges_groups.keys()) if len(parallel_hedges_groups_keys) > 0: modified = True for key in parallel_hedges_groups_keys: hedges_group = hypergraph.parallel_hedges_groups[key] endpoints = hypergraph.endpoints(hedges_group[0]) perms = permutations(endpoints) possible_labels = [] for perm in perms: possible_label = {} possible_label["perm"] = perm possible_label["label"] = [] for hedge in hedges_group: possible_label["label"].append(Hypergraph.hedge_to_string(hypergraph, hedge, perm)) possible_label["label"].sort() possible_label["label"] = u",".join(possible_label["label"]) possible_labels.append(possible_label) possible_labels = sorted(possible_labels, key=lambda element: element["label"]) minimal_label = possible_labels[0]["label"] minimal_perm_indices = filter(lambda i: possible_labels[i]["label"] == minimal_label, range(len(possible_labels))) direction = set([possible_labels[i]["perm"] for i in minimal_perm_indices]) hypergraph.remove_edges_from(hedges_group, unsafe=True) hypergraph.add_edge(endpoints, direction, u"(3;{0})".format(minimal_label)) hypergraph.reset_parallel_hedges_groups() return modified def rules_4_5_6_7(hypergraph, return_features=False, compute_string=True): modified = False degree_3_features = ReducibleFeature.extract_degree_3_features(hypergraph) if return_features: degree_3_features = list(degree_3_features) affected_nodes = set() new_edges = set() for feature in degree_3_features: if not modified: modified = True _new_edges = feature.reduce(hypergraph, compute_string) affected_nodes |= set(feature.reducible_nodes) | set(feature.peripheral_nodes) new_edges |= _new_edges new_hedges = set(filter(lambda edge_id: edge_id.startswith(u"he_"), new_edges)) hypergraph.update_parallel_edges_groups(new_edges - new_hedges) hypergraph.update_parallel_hedges_groups(new_hedges) hypergraph.update_nodes_with_n_neighbors(affected_nodes) return modified, degree_3_features if return_features else None if type(graph) is not Hypergraph: hypergraph = Hypergraph(graph) else: hypergraph = graph.copy() features = [] treewidth = 0 if hypergraph.number_of_nodes() == 0: if return_features: return treewidth, "", features else: return treewidth, "" new_features = [] while True: modified = False if return_features: features += new_features # hypergraph.visualize() # no need to check if modified here to continue, just go to the next rule after rule_0(hypergraph, compute_string) modified, new_features = rule_1(hypergraph, return_features, compute_string) if modified: if treewidth < 1: treewidth = 1 continue modified, new_features = rule_2(hypergraph, return_features, compute_string) if modified: if treewidth < 2: treewidth = 2 continue if compute_string: modified = rule_3(hypergraph) if modified: new_features = [] continue modified, new_features = rules_4_5_6_7(hypergraph, return_features, compute_string) if modified: if treewidth < 3: treewidth = 3 continue else: if is_done(hypergraph): if hypergraph.number_of_nodes() == 0: sys.stderr.write("\n[ArnborgProskurowski] Error: empty graph produced.") if return_features: return treewidth, u"", features else: return treewidth, u"" else: canon_str = collect_labels(hypergraph) if compute_string else u"" if return_features: features += new_features return treewidth, canon_str, features else: return treewidth, canon_str else: if return_features: features += new_features return -1, u"Tree-width > 3", features else: return -1, u"Tree-width > 3"