def testGetSimilarNodesToQueryNode(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=0)
     sketch_matrix = SketchMatrix(25, 265, ch_matrix)
     similar_nodes_exp = np.array([0, 5, 7])
     similar_nodes, _ = similar_nodes_mining.get_similar_nodes(
         "n_7",
         dummy_hypergraph,
         sketch_matrix,
         0, [],
         r_in=3,
         r_out=2,
         r_all=0)
     equality = similar_nodes_exp == similar_nodes
     if type(equality) is not bool:
         equality = equality.all()
     self.assertTrue(
         equality,
         "Wrong similar nodes were extracted (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.)."
     )
 def testCharacteristicMatrix_JaccardSimMatrix(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     equality = (self.ch_matrix_jaccard_sim_exp == ch_matrix_jaccard_sim).all()
     self.assertTrue(equality, "The computed Jaccard similarity matrix is wrong.")
 def testCharacteristicMatrix_ReadWrite(self):
     file_name = "test_files/characteristic_matrix.tmp"
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=2, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=4)
     ch_matrix.save_to_file(file_name)
     read_ch_matrix = CharacteristicMatrix.load_from_file(file_name)
     self.assertEqual(read_ch_matrix, ch_matrix, "The read characteristic matrix is different from the saved one.")
def calculate_ch_matrix():
    in_files = helpers.datasets[dataset]["files"]

    print "Converting RDF to NetworkX graph started at", time.strftime(
        time_format)
    start = time.time()
    graph, node_id_map = rdf.convert_rdf_to_nx_graph(in_files,
                                                     discard_classes=False)
    print "Converting RDF to NetworkX graph took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Saving NodeID map started at", time.strftime(time_format)
    start = time.time()
    inout.save_to_file(node_id_map, path + "{0}_node_id_map".format(dataset))
    print "Saving NodeID map took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Building hypergraph started at", time.strftime(time_format)
    start = time.time()
    hypergraph = Hypergraph(graph)
    print "Building hypergraph took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Saving hypergraph started at", time.strftime(time_format)
    start = time.time()
    hypergraph.save_to_file(path + "{0}_hgraph".format(dataset))
    print "Saving hypergraph took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Building characteristic matrix started at", time.strftime(
        time_format)
    start = time.time()
    rballs_database, index_node_map = similar_nodes_mining.extract_rballs_database(
        hypergraph, r_in=r_in, r_out=r_out, r_all=r_all)
    ch_matrix = CharacteristicMatrix(rballs_database,
                                     hypergraph.number_of_nodes(),
                                     wl_iterations=wl_iterations,
                                     print_progress=True)
    print "Building characteristic matrix took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Saving Column index to Node map started at", time.strftime(
        time_format)
    start = time.time()
    inout.save_to_file(index_node_map,
                       path + "{0}_index_node_map".format(dataset))
    print "Saving Column index to Node map took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Saving characteristic matrix started at", time.strftime(time_format)
    start = time.time()
    ch_matrix.save_to_file(path + "{0}_ch_matrix".format(dataset))
    print "Saving characteristic matrix took", time.time() - start, "s"
    print "-----------------------------------------"

    return ch_matrix, hypergraph, index_node_map, node_id_map
 def testCharacteristicMatrix(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=0)
     self.assertEqual(self.raw_ch_matrix_exp, ch_matrix.sparse_matrix,
                      "The computed characteristic matrix is wrong.")
 def testSketchMatrix_ReadWrite(self):
     file_name = "test_files/sketch_matrix.tmp"
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=2, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=4)
     sketch_matrix = SketchMatrix(5, 20, ch_matrix)
     sketch_matrix.save_to_file(file_name)
     read_sketch_matrix = SketchMatrix.load_from_file(file_name)
     equality = (read_sketch_matrix.matrix == sketch_matrix.matrix).all()
     self.assertTrue(equality, "The read sketch matrix is different from the saved one.")
 def testSimilarNodesMining(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8, dtype=np.float32)
     sketch_matrix = SketchMatrix(25, 265, ch_matrix)
     similarity_matrix = similar_nodes_mining.get_node_similarity_matrix(sketch_matrix)
     equality = (similarity_matrix_exp == similarity_matrix).all()
     self.assertTrue(equality, "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.).")
 def testGetSimilarNodesToQueryNode(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0)
     sketch_matrix = SketchMatrix(25, 265, ch_matrix)
     similar_nodes_exp = np.array([0, 5, 7])
     similar_nodes, _ = similar_nodes_mining.get_similar_nodes("n_7", dummy_hypergraph, sketch_matrix, 0, [], r_in=3, r_out=2, r_all=0)
     equality = similar_nodes_exp == similar_nodes
     if type(equality) is not bool:
         equality = equality.all()
     self.assertTrue(equality, "Wrong similar nodes were extracted (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.).")
 def testCharacteristicMatrix_JaccardSimMatrix(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     equality = (
         self.ch_matrix_jaccard_sim_exp == ch_matrix_jaccard_sim).all()
     self.assertTrue(equality,
                     "The computed Jaccard similarity matrix is wrong.")
 def testCharacteristicMatrix_ReadWrite(self):
     file_name = "test_files/characteristic_matrix.tmp"
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=2, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=4)
     ch_matrix.save_to_file(file_name)
     read_ch_matrix = CharacteristicMatrix.load_from_file(file_name)
     self.assertEqual(
         read_ch_matrix, ch_matrix,
         "The read characteristic matrix is different from the saved one.")
 def testSketchMatrix_ReadWrite(self):
     file_name = "test_files/sketch_matrix.tmp"
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=2, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=4)
     sketch_matrix = SketchMatrix(5, 20, ch_matrix)
     sketch_matrix.save_to_file(file_name)
     read_sketch_matrix = SketchMatrix.load_from_file(file_name)
     equality = (read_sketch_matrix.matrix == sketch_matrix.matrix).all()
     self.assertTrue(
         equality,
         "The read sketch matrix is different from the saved one.")
Ejemplo n.º 12
0
def calculate_ch_matrix():
    in_files = helpers.datasets[dataset]["files"]
    
    print "Converting RDF to NetworkX graph started at", time.strftime(time_format)
    start = time.time()
    graph, node_id_map = rdf.convert_rdf_to_nx_graph(in_files, discard_classes=False)
    print "Converting RDF to NetworkX graph took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Saving NodeID map started at", time.strftime(time_format)
    start = time.time()
    inout.save_to_file(node_id_map, path + "{0}_node_id_map".format(dataset))
    print "Saving NodeID map took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Building hypergraph started at", time.strftime(time_format)
    start = time.time()
    hypergraph = Hypergraph(graph)
    print "Building hypergraph took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Saving hypergraph started at", time.strftime(time_format)
    start = time.time()
    hypergraph.save_to_file(path + "{0}_hgraph".format(dataset))
    print "Saving hypergraph took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Building characteristic matrix started at", time.strftime(time_format)
    start = time.time()
    rballs_database, index_node_map = similar_nodes_mining.extract_rballs_database(hypergraph, r_in=r_in, r_out=r_out, r_all=r_all)
    ch_matrix = CharacteristicMatrix(rballs_database, hypergraph.number_of_nodes(), wl_iterations=wl_iterations, print_progress=True)
    print "Building characteristic matrix took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Saving Column index to Node map started at", time.strftime(time_format)
    start = time.time()
    inout.save_to_file(index_node_map, path + "{0}_index_node_map".format(dataset))
    print "Saving Column index to Node map took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Saving characteristic matrix started at", time.strftime(time_format)
    start = time.time()
    ch_matrix.save_to_file(path + "{0}_ch_matrix".format(dataset))
    print "Saving characteristic matrix took", time.time() - start, "s"
    print "-----------------------------------------"
    
    return ch_matrix, hypergraph, index_node_map, node_id_map
 def testSimilarNodesMining(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8,
                                      dtype=np.float32)
     sketch_matrix = SketchMatrix(25, 265, ch_matrix)
     similarity_matrix = similar_nodes_mining.get_node_similarity_matrix(
         sketch_matrix)
     equality = (similarity_matrix_exp == similarity_matrix).all()
     self.assertTrue(
         equality,
         "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.)."
     )
 def testCharacteristicMatrix(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0)
     self.assertEqual(self.raw_ch_matrix_exp, ch_matrix.sparse_matrix, "The computed characteristic matrix is wrong.")
Ejemplo n.º 15
0
def run_algorithm(graph, return_features=False, compute_string=True):
    '''Performs the algorithm proposed by Arnborg & Proskurowski on a graph with tree-width at most 3.
    :param graph: A NetworkX graph or a Hypergraph.
    :param return_features: (default False) If true, returns the features, which
    were reduced by the algorithm.
    :param compute_string: (default True) If True returns the canonical string
    representation of the graph. False means to perform the reduction rules
    without computing the canonical string.
    :return A tuple of the form (tree_width, canonical_string[, reduced_features]).
    '''
    def is_done(hypergraph):
        if hypergraph.number_of_edges() == 0:
            return True
        else:
            return False
    
    def collect_labels(hypergraph):
        labels = []
        
        for node in hypergraph.nodes_iter():
            labels.append(hypergraph.node[node]["labels"][0])
        
        labels.sort()
        
        return u",".join(labels)
    
    def rule_0(hypergraph, compute_string):
        modified = False
        
        # (originally 1.3) - remove self-loops
        self_loops = list(hypergraph.self_loops)
        if len(self_loops) > 0:
            modified = True
        if compute_string:
            for self_loop in self_loops:
                node = hypergraph.endpoints(self_loop)[0]
                hypergraph.add_node_label(node, hypergraph.edge(self_loop)["labels"][0])
                hypergraph.remove_edge(self_loop)
        else:
            hypergraph.remove_edges_from(self_loops, unsafe=True)
        
        # rule 0.1
        if compute_string:
            nodes_with_more_labels = list(hypergraph.nodes_with_more_labels)
            if len(nodes_with_more_labels) > 0:
                modified = True
            
            for node in nodes_with_more_labels:
                labels = hypergraph.node[node]["labels"]
                labels.sort()
                new_label = u"(0.1;{0})".format(u",".join(labels))
                hypergraph.set_node_labels(node, [new_label])
            
            hypergraph.reset_nodes_with_more_labels()
        
        # rule 0.2
        parallel_edges_groups_keys = list(hypergraph.parallel_edges_groups.keys())
        
        if len(parallel_edges_groups_keys) > 0:
            modified = True
        
        for key in parallel_edges_groups_keys:
            edges_group = list(hypergraph.parallel_edges_groups[key])
            endpoints = hypergraph.endpoints(edges_group[0])
            if compute_string:
                perms = permutations(endpoints)
                possible_labels = []
                for perm in perms:
                    possible_label = {}
                    possible_label["perm"] = perm
                    possible_label["label"] = []
                    for edge in edges_group:
                        possible_label["label"].append(Hypergraph.edge_to_string(hypergraph, edge, perm))
                    possible_label["label"].sort()
                    possible_label["label"] = u"(0.2;{0})".format(u",".join(possible_label["label"]))
                    possible_labels.append(possible_label)
                possible_labels = sorted(possible_labels, key=lambda element: element["label"])
                minimal_label = possible_labels[0]["label"]
                minimal_perm_indices = filter(lambda i: possible_labels[i]["label"] == minimal_label, range(len(possible_labels)))
                direction = set([possible_labels[i]["perm"] for i in minimal_perm_indices])
                hypergraph.remove_edges_from(edges_group, unsafe=True)
                hypergraph.add_edge(endpoints, direction, minimal_label)
            else:
                hypergraph.remove_edges_from(edges_group, unsafe=True)
                hypergraph.add_edge(endpoints, set(), "")
        
        hypergraph.reset_parallel_edges_groups()
        
        return modified
        
    def rule_1(hypergraph, return_features=False, compute_string=True):
        modified = False
        pendant_features = ReducibleFeature.extract_rule_1_features(hypergraph)
        if return_features:
            pendant_features = list(pendant_features)
        
        affected_nodes = set()
        
        for feature in pendant_features:
            if not modified:
                modified = True
            feature.reduce(hypergraph, compute_string)
            affected_nodes |= set(feature.reducible_nodes) | set(feature.peripheral_nodes)
        
        hypergraph.update_nodes_with_n_neighbors(affected_nodes)
        
        return modified, pendant_features if return_features else None
    
    def rule_2(hypergraph, return_features=False, compute_string=True):
        modified = False
        series_features = ReducibleFeature.extract_rule_2_features(hypergraph)
        if return_features:
            series_features = list(series_features)
        
        affected_nodes = set()
        new_edges = set()
        
        for feature in series_features:
            if not modified:
                modified = True
            _new_edges = feature.reduce(hypergraph, compute_string)
            affected_nodes |= set(feature.reducible_nodes) | set(feature.peripheral_nodes)
            new_edges |= _new_edges
        
        hypergraph.update_parallel_edges_groups(new_edges)
        hypergraph.update_nodes_with_n_neighbors(affected_nodes)
        
        return modified, series_features if return_features else None
    
    def rule_3(hypergraph):
        modified = False
        
        parallel_hedges_groups_keys = list(hypergraph.parallel_hedges_groups.keys())
        
        if len(parallel_hedges_groups_keys) > 0:
            modified = True
        
        for key in parallel_hedges_groups_keys:
            hedges_group = hypergraph.parallel_hedges_groups[key]
            endpoints = hypergraph.endpoints(hedges_group[0])
            perms = permutations(endpoints)
            possible_labels = []
            for perm in perms:
                possible_label = {}
                possible_label["perm"] = perm
                possible_label["label"] = []
                for hedge in hedges_group:
                    possible_label["label"].append(Hypergraph.hedge_to_string(hypergraph, hedge, perm))
                possible_label["label"].sort()
                possible_label["label"] = u",".join(possible_label["label"])
                possible_labels.append(possible_label)
            possible_labels = sorted(possible_labels, key=lambda element: element["label"])
            minimal_label = possible_labels[0]["label"]
            minimal_perm_indices = filter(lambda i: possible_labels[i]["label"] == minimal_label, range(len(possible_labels)))
            direction = set([possible_labels[i]["perm"] for i in minimal_perm_indices])
            hypergraph.remove_edges_from(hedges_group, unsafe=True)
            hypergraph.add_edge(endpoints, direction, u"(3;{0})".format(minimal_label))
        
        hypergraph.reset_parallel_hedges_groups()
        
        return modified
    
    def rules_4_5_6_7(hypergraph, return_features=False, compute_string=True):
        modified = False
        degree_3_features = ReducibleFeature.extract_degree_3_features(hypergraph)
        if return_features:
            degree_3_features = list(degree_3_features)
        
        affected_nodes = set()
        new_edges = set()
        
        for feature in degree_3_features:
            if not modified:
                modified = True
            _new_edges = feature.reduce(hypergraph, compute_string)
            affected_nodes |= set(feature.reducible_nodes) | set(feature.peripheral_nodes)
            new_edges |= _new_edges
        
        new_hedges = set(filter(lambda edge_id: edge_id.startswith(u"he_"), new_edges))
        
        hypergraph.update_parallel_edges_groups(new_edges - new_hedges)
        hypergraph.update_parallel_hedges_groups(new_hedges)
        hypergraph.update_nodes_with_n_neighbors(affected_nodes)
        
        return modified, degree_3_features if return_features else None
    
    if type(graph) is not Hypergraph:
        hypergraph = Hypergraph(graph)
    else:
        hypergraph = graph.copy()
    
    features = []
    treewidth = 0
    
    if hypergraph.number_of_nodes() == 0:
        if return_features:
            return treewidth, "", features
        else:
            return treewidth, ""
    
    new_features = []
            
    while True:
        modified = False
        
        if return_features:
            features += new_features
        
#         hypergraph.visualize()
        
        # no need to check if modified here to continue, just go to the next rule after
        rule_0(hypergraph, compute_string)

        modified, new_features = rule_1(hypergraph, return_features, compute_string)
        if modified:
            if treewidth < 1:
                treewidth = 1
            continue

        modified, new_features = rule_2(hypergraph, return_features, compute_string)
        if modified:
            if treewidth < 2:
                treewidth = 2
            continue
        
        if compute_string:
            modified = rule_3(hypergraph)
            if modified:
                new_features = []
                continue

        modified, new_features = rules_4_5_6_7(hypergraph, return_features, compute_string)
        if modified:
            if treewidth < 3:
                treewidth = 3
            continue
        else:
            if is_done(hypergraph):
                if hypergraph.number_of_nodes() == 0:
                    sys.stderr.write("\n[ArnborgProskurowski] Error: empty graph produced.")
                    if return_features:
                        return treewidth, u"", features
                    else:
                        return treewidth, u""
                else:
                    canon_str = collect_labels(hypergraph) if compute_string else u""
                    if return_features:
                        features += new_features
                        return treewidth, canon_str, features
                    else:
                        return treewidth, canon_str
            else:
                if return_features:
                    features += new_features
                    return -1, u"Tree-width > 3", features
                else:
                    return -1, u"Tree-width > 3"