def testGetSimilarNodesToQueryNode(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=0)
     sketch_matrix = SketchMatrix(25, 265, ch_matrix)
     similar_nodes_exp = np.array([0, 5, 7])
     similar_nodes, _ = similar_nodes_mining.get_similar_nodes(
         "n_7",
         dummy_hypergraph,
         sketch_matrix,
         0, [],
         r_in=3,
         r_out=2,
         r_all=0)
     equality = similar_nodes_exp == similar_nodes
     if type(equality) is not bool:
         equality = equality.all()
     self.assertTrue(
         equality,
         "Wrong similar nodes were extracted (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.)."
     )
Ejemplo n.º 2
0
    def testRBallHyper_CenterDefaultColor(self):
        dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph)
        rball_in = algorithms.r_ball_hyper(dummy_hypergraph,
                                           "n_10",
                                           2,
                                           -1,
                                           center_default_color=True)
        rball_out = algorithms.r_ball_hyper(dummy_hypergraph,
                                            "n_10",
                                            2,
                                            1,
                                            center_default_color=True)
        rball_all = algorithms.r_ball_hyper(dummy_hypergraph,
                                            "n_10",
                                            2,
                                            0,
                                            center_default_color=True)
        d_rball_all = Hypergraph(example_graphs.gt_dummy_rball_10_r2_all)
        d_rball_out = Hypergraph(example_graphs.gt_dummy_rball_10_r2_out)
        d_rball_in = Hypergraph(example_graphs.gt_dummy_rball_10_r2_in)

        d_rball_all.node["n_10"]["labels"] = ["0"]
        d_rball_out.node["n_10"]["labels"] = ["0"]
        d_rball_in.node["n_10"]["labels"] = ["0"]

        all_isomorphic = algorithms.isomorphic(d_rball_all, rball_all)
        out_isomorphic = algorithms.isomorphic(d_rball_out, rball_out)
        in_isomorphic = algorithms.isomorphic(d_rball_in, rball_in)

        self.assertTrue(all_isomorphic,
                        "Problem extracting r-ball with edge_dir=0.")
        self.assertTrue(out_isomorphic,
                        "Problem extracting r-ball with edge_dir=1.")
        self.assertTrue(in_isomorphic,
                        "Problem extracting r-ball with edge_dir=-1.")
Ejemplo n.º 3
0
 def testHypergraph_edges_iter(self):
     dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph)
     self.assertEqual(len(list(dummy_hypergraph.edges_iter())), 32)
     self.assertEqual(set(dummy_hypergraph.edges_iter("n_6")),
                      set(["e_5", "e_9", "e_13", "e_28"]))
     self.assertEqual(set(dummy_hypergraph.edges_iter("n_5", "n_1")),
                      set(["e_15"]))
Ejemplo n.º 4
0
 def testHypergraph_subgraph_with_labels(self):
     dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph)
     subgraph = dummy_hypergraph.subgraph_with_labels(
         set(["n_1", "n_6", "n_9", "n_10"]))
     isomorphic = algorithms.isomorphic(example_graphs.gt_dummy_subgraph,
                                        subgraph)
     self.assertTrue(isomorphic,
                     "Incorrect subgraph extraction from hypergraph.")
 def testCharacteristicMatrix_JaccardSimMatrix(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     equality = (self.ch_matrix_jaccard_sim_exp == ch_matrix_jaccard_sim).all()
     self.assertTrue(equality, "The computed Jaccard similarity matrix is wrong.")
Ejemplo n.º 6
0
 def testHypergraph_ReadWrite(self):
     file_name = "test_files/dummy_hypergraph.tmp"
     dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph)
     dummy_hypergraph.save_to_file(file_name)
     read_hypergraph = Hypergraph.load_from_file(file_name)
     self.assertEqual(
         dummy_hypergraph, read_hypergraph,
         "The read hypergraph is different from the saved one.")
 def testCharacteristicMatrix_ReadWrite(self):
     file_name = "test_files/characteristic_matrix.tmp"
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=2, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=4)
     ch_matrix.save_to_file(file_name)
     read_ch_matrix = CharacteristicMatrix.load_from_file(file_name)
     self.assertEqual(read_ch_matrix, ch_matrix, "The read characteristic matrix is different from the saved one.")
def calculate_ch_matrix():
    in_files = helpers.datasets[dataset]["files"]

    print "Converting RDF to NetworkX graph started at", time.strftime(
        time_format)
    start = time.time()
    graph, node_id_map = rdf.convert_rdf_to_nx_graph(in_files,
                                                     discard_classes=False)
    print "Converting RDF to NetworkX graph took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Saving NodeID map started at", time.strftime(time_format)
    start = time.time()
    inout.save_to_file(node_id_map, path + "{0}_node_id_map".format(dataset))
    print "Saving NodeID map took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Building hypergraph started at", time.strftime(time_format)
    start = time.time()
    hypergraph = Hypergraph(graph)
    print "Building hypergraph took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Saving hypergraph started at", time.strftime(time_format)
    start = time.time()
    hypergraph.save_to_file(path + "{0}_hgraph".format(dataset))
    print "Saving hypergraph took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Building characteristic matrix started at", time.strftime(
        time_format)
    start = time.time()
    rballs_database, index_node_map = similar_nodes_mining.extract_rballs_database(
        hypergraph, r_in=r_in, r_out=r_out, r_all=r_all)
    ch_matrix = CharacteristicMatrix(rballs_database,
                                     hypergraph.number_of_nodes(),
                                     wl_iterations=wl_iterations,
                                     print_progress=True)
    print "Building characteristic matrix took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Saving Column index to Node map started at", time.strftime(
        time_format)
    start = time.time()
    inout.save_to_file(index_node_map,
                       path + "{0}_index_node_map".format(dataset))
    print "Saving Column index to Node map took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Saving characteristic matrix started at", time.strftime(time_format)
    start = time.time()
    ch_matrix.save_to_file(path + "{0}_ch_matrix".format(dataset))
    print "Saving characteristic matrix took", time.time() - start, "s"
    print "-----------------------------------------"

    return ch_matrix, hypergraph, index_node_map, node_id_map
 def testCharacteristicMatrix(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=0)
     self.assertEqual(self.raw_ch_matrix_exp, ch_matrix.sparse_matrix,
                      "The computed characteristic matrix is wrong.")
 def testSimilarNodesMining(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8, dtype=np.float32)
     sketch_matrix = SketchMatrix(25, 265, ch_matrix)
     similarity_matrix = similar_nodes_mining.get_node_similarity_matrix(sketch_matrix)
     equality = (similarity_matrix_exp == similarity_matrix).all()
     self.assertTrue(equality, "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.).")
 def testSketchMatrix_ReadWrite(self):
     file_name = "test_files/sketch_matrix.tmp"
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=2, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=4)
     sketch_matrix = SketchMatrix(5, 20, ch_matrix)
     sketch_matrix.save_to_file(file_name)
     read_sketch_matrix = SketchMatrix.load_from_file(file_name)
     equality = (read_sketch_matrix.matrix == sketch_matrix.matrix).all()
     self.assertTrue(equality, "The read sketch matrix is different from the saved one.")
Ejemplo n.º 12
0
 def testDropEdgesByProbability(self):
     dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph)
     edges_count = dummy_hypergraph.number_of_edges()
     for p in [0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.]:
         new_hypergraph = algorithms.drop_edges_by_probability(dummy_hypergraph, p)
         new_graph = algorithms.drop_edges_by_probability(example_graphs.gt_dummy_graph, p)
         edges_prop_exp = ((1. - p) * edges_count) / float(edges_count)
         edges_prop_hyper = float(new_hypergraph.number_of_edges()) / float(edges_count)
         edges_prop = float(new_graph.number_of_edges()) / float(edges_count)
         msg = "The proportion of edges remaining after being dropping deviate too much from the expected."
         self.assertAlmostEquals(edges_prop_exp, edges_prop_hyper, delta=0.2, msg=msg)
         self.assertAlmostEquals(edges_prop_exp, edges_prop, delta=0.2, msg=msg)
 def testGetSimilarNodesToQueryNode(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0)
     sketch_matrix = SketchMatrix(25, 265, ch_matrix)
     similar_nodes_exp = np.array([0, 5, 7])
     similar_nodes, _ = similar_nodes_mining.get_similar_nodes("n_7", dummy_hypergraph, sketch_matrix, 0, [], r_in=3, r_out=2, r_all=0)
     equality = similar_nodes_exp == similar_nodes
     if type(equality) is not bool:
         equality = equality.all()
     self.assertTrue(equality, "Wrong similar nodes were extracted (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.).")
 def testCharacteristicMatrix_JaccardSimMatrix(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     equality = (
         self.ch_matrix_jaccard_sim_exp == ch_matrix_jaccard_sim).all()
     self.assertTrue(equality,
                     "The computed Jaccard similarity matrix is wrong.")
 def testWShinglesExtraction(self):
     h1 = Hypergraph(example_graphs.w_shingles_graph_1)
     h2 = Hypergraph(example_graphs.w_shingles_graph_2)
     h1_shingles_exp = set([
         u'_1.0,', u'2;(wl', u',1)))', u'1.2),', u',a),b', u'))),w',
         u',(1.2', u'2),(1', u'(x,((', u'(0,1)', u';(x,(', u',((0,',
         u'2;(x,', u'))),a', u'(1.2;', u'0,((0', u'),(1.', u'.2;(x',
         u';(wl_', u'.2;(w', u',a),(', u'l_1.2', u'l_1.1', u'l_1.0',
         u')),a)', u'),a),', u'1;(1.', u'((0,1', u'_1.1)', u')),wl',
         u'1.0,(', u'(0.1;', u'_1.2)', u'wl_1.', u'1))),', u'0,1))',
         u'.2),(', u'0.1;(', u'2),wl', u'),wl_', u'.0,((', u'a),b)',
         u'x,((0', u'a),(1', u'.1;(1', u'(wl_1', u'1.2;(', u',wl_1',
         u';(1.2', u'.2),w'
     ])
     h2_shingles_exp = set([
         u'y,((1', u'2;(wl', u'1.4),', u',1)))', u'(1,0)', u'1.2),',
         u'_1.0,', u'))),w', u',(1.2', u'2),(1', u'(x,((', u'(0,1)',
         u'_1.4)', u';(x,(', u',((0,', u'2;(x,', u'))),a', u'(1.2;',
         u'))),c', u'0,((0', u'1,0))', u'),(1.', u'_1.5)', u',0)))',
         u'.2;(y', u';(wl_', u'.2;(w', u',a),(', u'l_1.5', u'l_1.4',
         u'l_1.3', u'l_1.2', u'c),b)', u'l_1.0', u')),a)', u'),a),',
         u'1;(1.', u'((0,1', u'3,((1', u',((1,', u'(y,((', u';(y,(',
         u'1.0,(', u'(0.1;', u'_1.2)', u'wl_1.', u'1))),', u'0,1))',
         u'.2),(', u'0.1;(', u'),wl_', u'.0,((', u'),c),', u'x,((0',
         u'1.3,(', u'a),(1', u',c),b', u'.4),w', u'4),wl', u'.2;(x',
         u'.1;(1', u'_1.3,', u'0))),', u'.3,((', u')),wl', u'(wl_1',
         u')),c)', u'1.2;(', u',wl_1', u'2;(y,', u';(1.2', u'((1,0'
     ])
     intersection_exp = set([
         u')),wl', u'_1.0,', u'.1;(1', u'1.0,(', u'2;(wl', u'_1.2)',
         u',1)))', u',wl_1', u'1.2),', u'wl_1.', u'1))),', u'0,1))',
         u'.2),(', u'))),w', u'0.1;(', u',(1.2', u'2),(1', u'.0,((',
         u'(x,((', u'(0,1)', u';(x,(', u'(0.1;', u',((0,', u'2;(x,',
         u'))),a', u'(1.2;', u'0,((0', u'),(1.', u'.2;(x', u';(wl_',
         u'a),(1', u'.2;(w', u',a),(', u'x,((0', u'l_1.2', u'l_1.0',
         u'(wl_1', u')),a)', u'),a),', u'1;(1.', u'((0,1', u'1.2;(',
         u'),wl_', u';(1.2'
     ])
     wl_state = None
     h1_shingles, wl_state = shingle_extraction.extract_w_shingles(
         h1, wl_iterations=1, wl_state=wl_state)
     h2_shingles, wl_state = shingle_extraction.extract_w_shingles(
         h2, wl_iterations=1, wl_state=wl_state)
     self.assertEqual(h1_shingles_exp, h1_shingles,
                      "Wrong w-shingles were extracted from hypergraph.")
     self.assertEqual(h2_shingles_exp, h2_shingles,
                      "Wrong w-shingles were extracted from hypergraph.")
     self.assertEqual(
         intersection_exp, h1_shingles & h2_shingles,
         "The intersection of the two sets of w-shingles is incorrect.")
 def testCharacteristicMatrix_ReadWrite(self):
     file_name = "test_files/characteristic_matrix.tmp"
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=2, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=4)
     ch_matrix.save_to_file(file_name)
     read_ch_matrix = CharacteristicMatrix.load_from_file(file_name)
     self.assertEqual(
         read_ch_matrix, ch_matrix,
         "The read characteristic matrix is different from the saved one.")
Ejemplo n.º 17
0
def load_ch_matrix():
    print "Reading NodeID map started at", time.strftime(time_format)
    start = time.time()
    node_id_map = inout.load_from_file(path + "{0}_node_id_map".format(dataset))
    print "Reading NodeID map took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Reading hypergraph started at", time.strftime(time_format)
    start = time.time()
    hypergraph = Hypergraph.load_from_file(path + "{0}_hgraph".format(dataset))
    print "Reading hypergraph took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Reading characteristic matrix started at", time.strftime(time_format)
    start = time.time()
    ch_matrix = CharacteristicMatrix.load_from_file(path + "{0}_ch_matrix".format(dataset))
    print "Reading characteristic matrix took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Reading Column index to Node map started at", time.strftime(time_format)
    start = time.time()
    index_node_map = inout.load_from_file(path + "{0}_index_node_map".format(dataset))
    print "Reading Column index to Node map took", time.time() - start, "s"
    print "-----------------------------------------"
    
    return ch_matrix, hypergraph, index_node_map, node_id_map
 def testSketchMatrix_ReadWrite(self):
     file_name = "test_files/sketch_matrix.tmp"
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=2, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=4)
     sketch_matrix = SketchMatrix(5, 20, ch_matrix)
     sketch_matrix.save_to_file(file_name)
     read_sketch_matrix = SketchMatrix.load_from_file(file_name)
     equality = (read_sketch_matrix.matrix == sketch_matrix.matrix).all()
     self.assertTrue(
         equality,
         "The read sketch matrix is different from the saved one.")
Ejemplo n.º 19
0
 def rule_3(hypergraph):
     modified = False
     
     parallel_hedges_groups_keys = list(hypergraph.parallel_hedges_groups.keys())
     
     if len(parallel_hedges_groups_keys) > 0:
         modified = True
     
     for key in parallel_hedges_groups_keys:
         hedges_group = hypergraph.parallel_hedges_groups[key]
         endpoints = hypergraph.endpoints(hedges_group[0])
         perms = permutations(endpoints)
         possible_labels = []
         for perm in perms:
             possible_label = {}
             possible_label["perm"] = perm
             possible_label["label"] = []
             for hedge in hedges_group:
                 possible_label["label"].append(Hypergraph.hedge_to_string(hypergraph, hedge, perm))
             possible_label["label"].sort()
             possible_label["label"] = u",".join(possible_label["label"])
             possible_labels.append(possible_label)
         possible_labels = sorted(possible_labels, key=lambda element: element["label"])
         minimal_label = possible_labels[0]["label"]
         minimal_perm_indices = filter(lambda i: possible_labels[i]["label"] == minimal_label, range(len(possible_labels)))
         direction = set([possible_labels[i]["perm"] for i in minimal_perm_indices])
         hypergraph.remove_edges_from(hedges_group, unsafe=True)
         hypergraph.add_edge(endpoints, direction, u"(3;{0})".format(minimal_label))
     
     hypergraph.reset_parallel_hedges_groups()
     
     return modified
def load_ch_matrix():
    print "Reading NodeID map started at", time.strftime(time_format)
    start = time.time()
    node_id_map = inout.load_from_file(path +
                                       "{0}_node_id_map".format(dataset))
    print "Reading NodeID map took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Reading hypergraph started at", time.strftime(time_format)
    start = time.time()
    hypergraph = Hypergraph.load_from_file(path + "{0}_hgraph".format(dataset))
    print "Reading hypergraph took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Reading characteristic matrix started at", time.strftime(
        time_format)
    start = time.time()
    ch_matrix = CharacteristicMatrix.load_from_file(
        path + "{0}_ch_matrix".format(dataset))
    print "Reading characteristic matrix took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Reading Column index to Node map started at", time.strftime(
        time_format)
    start = time.time()
    index_node_map = inout.load_from_file(path +
                                          "{0}_index_node_map".format(dataset))
    print "Reading Column index to Node map took", time.time() - start, "s"
    print "-----------------------------------------"

    return ch_matrix, hypergraph, index_node_map, node_id_map
Ejemplo n.º 21
0
def calculate_ch_matrix():
    in_files = helpers.datasets[dataset]["files"]
    
    print "Converting RDF to NetworkX graph started at", time.strftime(time_format)
    start = time.time()
    graph, node_id_map = rdf.convert_rdf_to_nx_graph(in_files, discard_classes=False)
    print "Converting RDF to NetworkX graph took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Saving NodeID map started at", time.strftime(time_format)
    start = time.time()
    inout.save_to_file(node_id_map, path + "{0}_node_id_map".format(dataset))
    print "Saving NodeID map took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Building hypergraph started at", time.strftime(time_format)
    start = time.time()
    hypergraph = Hypergraph(graph)
    print "Building hypergraph took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Saving hypergraph started at", time.strftime(time_format)
    start = time.time()
    hypergraph.save_to_file(path + "{0}_hgraph".format(dataset))
    print "Saving hypergraph took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Building characteristic matrix started at", time.strftime(time_format)
    start = time.time()
    rballs_database, index_node_map = similar_nodes_mining.extract_rballs_database(hypergraph, r_in=r_in, r_out=r_out, r_all=r_all)
    ch_matrix = CharacteristicMatrix(rballs_database, hypergraph.number_of_nodes(), wl_iterations=wl_iterations, print_progress=True)
    print "Building characteristic matrix took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Saving Column index to Node map started at", time.strftime(time_format)
    start = time.time()
    inout.save_to_file(index_node_map, path + "{0}_index_node_map".format(dataset))
    print "Saving Column index to Node map took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Saving characteristic matrix started at", time.strftime(time_format)
    start = time.time()
    ch_matrix.save_to_file(path + "{0}_ch_matrix".format(dataset))
    print "Saving characteristic matrix took", time.time() - start, "s"
    print "-----------------------------------------"
    
    return ch_matrix, hypergraph, index_node_map, node_id_map
Ejemplo n.º 22
0
def r_ball_hyper(hypergraph, center, r, edge_dir=0, center_default_color=False):
    '''The same as r_ball but for Hypergraph.
    '''
    assert type(hypergraph) is Hypergraph
    
    visited_nodes = set()
    
    def recurse(u, i):
        visited_nodes.add(u)
        edges = hypergraph.edges_iter_dir(u, dir_code=edge_dir)
        skip_edges = set()
        for edge in edges:
            if edge in skip_edges:
                continue
            endpoints = hypergraph.endpoints(edge)
            new_endpoints = set(endpoints) - set([u])
            for v in new_endpoints:
                if not rball.has_node(v):
                    rball.add_node(v, attr_dict=copy.deepcopy(hypergraph.node[v]))
            
            first_new_endpoint = next(iter(new_endpoints))
            # TODO: this condition may be tricky if the graph has hyperedges
            if not rball.has_edge(u, first_new_endpoint, edge_dir):
                parallel_edges = hypergraph.edges_iter_dir(u, first_new_endpoint, dir_code=edge_dir)
                # add all parallel edges in the same direction to the r-ball
                for parallel_edge in parallel_edges:
                    skip_edges.add(parallel_edge)
                    p_edge_attr = hypergraph.edge(parallel_edge)
                    direction = p_edge_attr["direction"]
                    # TODO: not safe if we have hyperedges
                    rball.add_edge(endpoints, direction=copy.deepcopy(direction), label=u",".join(copy.deepcopy(p_edge_attr["labels"])))
            
            if i < r:
                for v in new_endpoints:
                    if v not in visited_nodes:
                        recurse(v, i + 1)
    
    rball = Hypergraph()
    if center_default_color:
        # the center node's default color is 0 ("owl:Thing")
        rball.add_node(center, attr_dict={"labels": ["0"]})
    else:
        rball.add_node(center, attr_dict=copy.deepcopy(hypergraph.node[center]))
    if r > 0:
        recurse(center, 1)
    
    rball.init_parallel_edges_groups()
    rball.init_nodes_with_n_neighbors()
    
    return rball
Ejemplo n.º 23
0
 def testWeisfeilerLehman(self):
     wl_state_exp = {
         "labels": {
             "0": "wl_0.0",
             "1": "wl_0.1",
             "a": "wl_0.2",
             "b": "wl_0.3",
             "wl_0.0;in(wl_0.3)": "wl_1.0",
             "wl_0.0;any(wl_0.2),in(wl_0.2)": "wl_1.1",
             "wl_0.1;any(wl_0.2),out(wl_0.2,wl_0.3)": "wl_1.2",
             "wl_0.1;any(wl_0.2),out(wl_0.2)": "wl_1.3",
             "wl_0.2;in(wl_0.1),out(wl_0.0)": "wl_1.4",
             "wl_0.2;any(wl_0.0,wl_0.1)": "wl_1.5",
             "wl_0.3;in(wl_0.1),out(wl_0.0)": "wl_1.6",
             "wl_1.0;in(wl_1.6)": "wl_2.0",
             "wl_1.1;any(wl_1.5),in(wl_1.4)": "wl_2.1",
             "wl_1.2;any(wl_1.5),out(wl_1.4,wl_1.6)": "wl_2.2",
             "wl_1.3;any(wl_1.5),out(wl_1.4)": "wl_2.3",
             "wl_1.4;in(wl_1.2),out(wl_1.1)": "wl_2.4",
             "wl_1.4;in(wl_1.3),out(wl_1.1)": "wl_2.5",
             "wl_1.5;any(wl_1.1,wl_1.2)": "wl_2.6",
             "wl_1.5;any(wl_1.1,wl_1.3)": "wl_2.7",
             "wl_1.6;in(wl_1.2),out(wl_1.0)": "wl_2.8",
             "wl_2.0;in(wl_2.8)": "wl_3.0",
             "wl_2.1;any(wl_2.7),in(wl_2.4)": "wl_3.1",
             "wl_2.1;any(wl_2.6),in(wl_2.5)": "wl_3.2",
             "wl_2.2;any(wl_2.6),out(wl_2.4,wl_2.8)": "wl_3.3",
             "wl_2.3;any(wl_2.7),out(wl_2.5)": "wl_3.4",
             "wl_2.4;in(wl_2.2),out(wl_2.1)": "wl_3.5",
             "wl_2.5;in(wl_2.3),out(wl_2.1)": "wl_3.6",
             "wl_2.6;any(wl_2.1,wl_2.2)": "wl_3.7",
             "wl_2.7;any(wl_2.1,wl_2.3)": "wl_3.8",
             "wl_2.8;in(wl_2.2),out(wl_2.0)": "wl_3.9"
         },
         "next_labels": {
             0: 4,
             1: 7,
             2: 9,
             3: 10
         }
     }
     hyper_dummy_wl = Hypergraph(example_graphs.gt_dummy_wl)
     hyper_dummy_wl, wl_state = weisfeiler_lehman.init(hyper_dummy_wl,
                                                       test_mode=True)
     i = 1
     while True:
         new_hyper_dummy_wl, wl_state = weisfeiler_lehman.iterate(
             hyper_dummy_wl, wl_state, i, test_mode=True)
         if weisfeiler_lehman.is_stable(hyper_dummy_wl, new_hyper_dummy_wl,
                                        i):
             break
         hyper_dummy_wl = new_hyper_dummy_wl
         i += 1
     self.assertEqual(
         wl_state_exp, wl_state,
         "The multi-sets of labels computed by Weisfeiler-Lehman are not correct."
     )
 def testSimilarNodesMining(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8,
                                      dtype=np.float32)
     sketch_matrix = SketchMatrix(25, 265, ch_matrix)
     similarity_matrix = similar_nodes_mining.get_node_similarity_matrix(
         sketch_matrix)
     equality = (similarity_matrix_exp == similarity_matrix).all()
     self.assertTrue(
         equality,
         "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.)."
     )
Ejemplo n.º 25
0
    def testRBallHyper(self):
        dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph)
        rball_in = algorithms.r_ball_hyper(dummy_hypergraph, "n_10", 2, -1)
        rball_out = algorithms.r_ball_hyper(dummy_hypergraph, "n_10", 2, 1)
        rball_all = algorithms.r_ball_hyper(dummy_hypergraph, "n_10", 2, 0)
        d_rball_all = Hypergraph(example_graphs.gt_dummy_rball_10_r2_all)
        d_rball_out = Hypergraph(example_graphs.gt_dummy_rball_10_r2_out)
        d_rball_in = Hypergraph(example_graphs.gt_dummy_rball_10_r2_in)

        all_isomorphic = algorithms.isomorphic(d_rball_all, rball_all)
        out_isomorphic = algorithms.isomorphic(d_rball_out, rball_out)
        in_isomorphic = algorithms.isomorphic(d_rball_in, rball_in)

        self.assertTrue(all_isomorphic,
                        "Problem extracting r-ball with edge_dir=0.")
        self.assertTrue(out_isomorphic,
                        "Problem extracting r-ball with edge_dir=1.")
        self.assertTrue(in_isomorphic,
                        "Problem extracting r-ball with edge_dir=-1.")
Ejemplo n.º 26
0
 def testDropEdgesByProbability(self):
     dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph)
     edges_count = dummy_hypergraph.number_of_edges()
     for p in [0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.]:
         new_hypergraph = algorithms.drop_edges_by_probability(
             dummy_hypergraph, p)
         new_graph = algorithms.drop_edges_by_probability(
             example_graphs.gt_dummy_graph, p)
         edges_prop_exp = ((1. - p) * edges_count) / float(edges_count)
         edges_prop_hyper = float(
             new_hypergraph.number_of_edges()) / float(edges_count)
         edges_prop = float(
             new_graph.number_of_edges()) / float(edges_count)
         msg = "The proportion of edges remaining after being dropping deviate too much from the expected."
         self.assertAlmostEquals(edges_prop_exp,
                                 edges_prop_hyper,
                                 delta=0.2,
                                 msg=msg)
         self.assertAlmostEquals(edges_prop_exp,
                                 edges_prop,
                                 delta=0.2,
                                 msg=msg)
 def testFeatureTypes(self):
     dummy_hypergraph_2 = Hypergraph(example_graphs.snm_dummy_graph_2)
     features = []
     raw_features = arnborg_proskurowski.get_reduced_features(
         dummy_hypergraph_2)
     for raw_feature in raw_features:
         new_features = list(
             feature_extraction.process_raw_feature(raw_feature,
                                                    dummy_hypergraph_2))
         features += new_features
     isomorphic = all([
         algorithms.isomorphic(features[i],
                               example_graphs.snm_dummy_graph_features_2[i])
         for i in range(len(features))
     ])
     self.assertTrue(isomorphic, "Wrong features extracted.")
Ejemplo n.º 28
0
def read_chemical_compounts(in_file, process_compound_function=None):
    '''Read a dataset of chemical compound graphs (e.g. Mutagenicity).
    :param in_file: Input text file.
    :return: the tuple (g, p) where g is a graph database to be used in building
    a characteristic matrix and p is a list containing the properties of the
    graphs in the database.
    '''
    chem_graph_database = []
    
    current_graph = None
    current_properties = None
#     r = 0
    with codecs.open(in_file, "r", "utf8") as fp:
        i = 0
        for line in fp:
            if i == 0:
#                 r += 1
#                 print "Processing row:", r
                if line.startswith("$"): # EOF
                    break
                assert line.startswith("#")
                current_properties = map(lambda x: int(x), line.split(" ")[1:])
                current_graph = nx.Graph()
#                 if current_properties[0] > 10:
#                     break
            elif i == 1:
                nodes = line.split(" ")[:-1]
                assert len(nodes) == current_properties[2]
                for node_index, node_label in enumerate(nodes):
                    current_graph.add_node(node_index + 1, labels=[node_label])
            else:
                edges = line.split(" ")[:-1]
                edges = [edges[e : e + 3] for e in itertools.imap(lambda x: 3 * x, range(len(edges)/3))]
                assert len(edges) == current_properties[3]
                for edge in edges:
                    current_graph.add_edge(int(edge[0]), int(edge[1]), label=edge[2])
                
                ch_db_record = (current_properties[0], [Hypergraph(current_graph)], current_properties[1])
                chem_graph_database.append(ch_db_record)
                if process_compound_function:
                    process_compound_function(ch_db_record)
                yield ch_db_record
            i = (i + 1) % 3
 def testFeatureExtraction(self):
     wl_state_exp = {
         "labels": {
             "g": "wl_0.0",
             "n": "wl_0.1",
             "r": "wl_0.2",
             "b": "wl_0.3",
             "wl_0.0;in(wl_0.2),out(wl_0.2)": "wl_1.0",
             "wl_0.1;in(wl_0.2),out(wl_0.2)": "wl_1.1",
             "wl_0.2;in(wl_0.1)": "wl_1.2",
             "wl_0.2;out(wl_0.0,wl_0.1)": "wl_1.3",
             "wl_0.2;in(wl_0.0)": "wl_1.4",
             "wl_0.1;in(wl_0.3),out(wl_0.2)": "wl_1.5",
             "wl_0.3;out(wl_0.1)": "wl_1.6",
             "wl_0.2;in(wl_0.1,wl_0.1)": "wl_1.7"
         },
         "next_labels": {
             0: 4,
             1: 8
         }
     }
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database = [
         r_ball_hyper(dummy_hypergraph, "n_2", 1, edge_dir=1),
         r_ball_hyper(dummy_hypergraph, "n_2", 1, edge_dir=-1)
     ]
     features = []
     wl_state = None
     for rball in rballs_database:
         new_features, wl_state = feature_extraction.extract_features(
             rball, wl_iterations=1, wl_state=wl_state)
         features += new_features
     self.assertEqual(
         wl_state_exp, wl_state,
         "The wrong wl_state was computed by Weisfeiler-Lehman.")
     isomorphic = all([
         algorithms.isomorphic(features[i],
                               example_graphs.snm_dummy_graph_features[i])
         for i in range(len(features))
     ])
     self.assertTrue(isomorphic, "Wrong features extracted.")
Ejemplo n.º 30
0
    def get_shingle_fingerprints():
        def inner(query_features):
            for features in query_features:
                for feature in features:
                    shingles = shingle_extraction.extract_shingles(feature)
                    fingerprints = fingerprint.get_fingerprints(shingles)
                    for fp in fingerprints:
                        yield fp

        new_wl_labels_list = wl_labels_list

        query_features = []
        for query_graph in query_graph_list:
            if type(query_graph) is Hypergraph:
                query_hypergraph = query_graph
            else:
                query_hypergraph = Hypergraph(query_graph)

            features, new_wl_labels_list = feature_extraction.extract_features(
                query_hypergraph, wl_iterations, new_wl_labels_list)
            query_features.append(features)

        return set(inner(query_features)), new_wl_labels_list
Ejemplo n.º 31
0
 def testRBallHyper_CenterDefaultColor(self):
     dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph)
     rball_in = algorithms.r_ball_hyper(dummy_hypergraph, "n_10", 2, -1, center_default_color=True)
     rball_out = algorithms.r_ball_hyper(dummy_hypergraph, "n_10", 2, 1, center_default_color=True)
     rball_all = algorithms.r_ball_hyper(dummy_hypergraph, "n_10", 2, 0, center_default_color=True)
     d_rball_all = Hypergraph(example_graphs.gt_dummy_rball_10_r2_all)
     d_rball_out = Hypergraph(example_graphs.gt_dummy_rball_10_r2_out)
     d_rball_in = Hypergraph(example_graphs.gt_dummy_rball_10_r2_in)
     
     d_rball_all.node["n_10"]["labels"] = ["0"]
     d_rball_out.node["n_10"]["labels"] = ["0"]
     d_rball_in.node["n_10"]["labels"] = ["0"]
     
     all_isomorphic = algorithms.isomorphic(d_rball_all, rball_all)
     out_isomorphic = algorithms.isomorphic(d_rball_out, rball_out)
     in_isomorphic = algorithms.isomorphic(d_rball_in, rball_in)
     
     self.assertTrue(all_isomorphic, "Problem extracting r-ball with edge_dir=0.")
     self.assertTrue(out_isomorphic, "Problem extracting r-ball with edge_dir=1.")
     self.assertTrue(in_isomorphic, "Problem extracting r-ball with edge_dir=-1.")
Ejemplo n.º 32
0
 def testHypergraph_ReadWrite(self):
     file_name = "test_files/dummy_hypergraph.tmp"
     dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph)
     dummy_hypergraph.save_to_file(file_name)
     read_hypergraph = Hypergraph.load_from_file(file_name)
     self.assertEqual(dummy_hypergraph, read_hypergraph, "The read hypergraph is different from the saved one.")
Ejemplo n.º 33
0
def r_ball_hyper(hypergraph,
                 center,
                 r,
                 edge_dir=0,
                 center_default_color=False):
    '''The same as r_ball but for Hypergraph.
    '''
    assert type(hypergraph) is Hypergraph

    visited_nodes = set()

    def recurse(u, i):
        visited_nodes.add(u)
        edges = hypergraph.edges_iter_dir(u, dir_code=edge_dir)
        skip_edges = set()
        for edge in edges:
            if edge in skip_edges:
                continue
            endpoints = hypergraph.endpoints(edge)
            new_endpoints = set(endpoints) - set([u])
            for v in new_endpoints:
                if not rball.has_node(v):
                    rball.add_node(v,
                                   attr_dict=copy.deepcopy(hypergraph.node[v]))

            first_new_endpoint = next(iter(new_endpoints))
            # TODO: this condition may be tricky if the graph has hyperedges
            if not rball.has_edge(u, first_new_endpoint, edge_dir):
                parallel_edges = hypergraph.edges_iter_dir(u,
                                                           first_new_endpoint,
                                                           dir_code=edge_dir)
                # add all parallel edges in the same direction to the r-ball
                for parallel_edge in parallel_edges:
                    skip_edges.add(parallel_edge)
                    p_edge_attr = hypergraph.edge(parallel_edge)
                    direction = p_edge_attr["direction"]
                    # TODO: not safe if we have hyperedges
                    rball.add_edge(endpoints,
                                   direction=copy.deepcopy(direction),
                                   label=u",".join(
                                       copy.deepcopy(p_edge_attr["labels"])))

            if i < r:
                for v in new_endpoints:
                    if v not in visited_nodes:
                        recurse(v, i + 1)

    rball = Hypergraph()
    if center_default_color:
        # the center node's default color is 0 ("owl:Thing")
        rball.add_node(center, attr_dict={"labels": ["0"]})
    else:
        rball.add_node(center,
                       attr_dict=copy.deepcopy(hypergraph.node[center]))
    if r > 0:
        recurse(center, 1)

    rball.init_parallel_edges_groups()
    rball.init_nodes_with_n_neighbors()

    return rball
Ejemplo n.º 34
0
    def chem_database_generator(full_graph, uri_node_map, type_color_map, compounds_and_targets):
        literal_colors = set()
        for rdf_type in type_color_map:
            # TODO: this condition is unsafe because it may remove not only literal colors
            if rdf_type.startswith(u"http://www.w3.org/2001/XMLSchema#"):
                literal_colors.add(type_color_map[rdf_type])
        
        bool_colors = filter(lambda x: x.startswith(u"http://www.w3.org/2001/XMLSchema#boolean"), type_color_map)
        bool_colors = set(map(lambda x: type_color_map[x], bool_colors))
        literal_colors -= bool_colors
         
        for node in full_graph.nodes():
            node_labels_set = set(full_graph.node[node]["labels"])
            # remove all literals (except booleans)
            if literal_colors & node_labels_set:
                full_graph.remove_node(node)
        
        # remove the color of named individual type from all nodes where it occurs
        named_indiv_uri = u"http://www.w3.org/2002/07/owl#NamedIndividual"
        if named_indiv_uri in type_color_map:
            named_indiv_color = type_color_map[named_indiv_uri]
            for node in full_graph.nodes_iter():
                if named_indiv_color in full_graph.node[node]["labels"]:
                    full_graph.node[node]["labels"].remove(named_indiv_color)
    
        full_hypergraph = Hypergraph(full_graph)
        
#         ################
#         # INFO: use this to remove the isMutagenic property when predicting mutagenicity
#         is_mutag_color = type_color_map[u"http://dl-learner.org/carcinogenesis#isMutagenic"]
#         edges_to_remove = []
#         for edge in full_hypergraph.edges_iter():
#             if is_mutag_color in full_hypergraph.edge(edge)['labels']:
#                 edges_to_remove.append(edge)
#         for edge in edges_to_remove:
#             full_hypergraph.safe_remove_edge(edge)
#         ################
        
        if not compounds_and_targets:
            compounds_and_targets = read_compounds_and_targets()
        
        def remove_other_neighbors_of_bool_literals(hypergraph, center_node):
            center_neighbors = hypergraph.neighbors(center_node)
            bool_literals = filter(lambda n: set(hypergraph.node[n]['labels']) & bool_colors, center_neighbors)
            for bool_literal in bool_literals:
                bool_literal_neigbors = set(hypergraph.neighbors(bool_literal))
                # exclude the center node from the removable nodes
                bool_literal_neigbors.remove(center_node)
                for neigh in bool_literal_neigbors:
                    hypergraph.safe_remove_node(neigh)
        
        for comp_id, target_label in compounds_and_targets:
            node_id = u"n_{0}".format(uri_node_map[uri_prefix + comp_id])
            comp_neighborhood_hypergraph = algorithms.r_ball_hyper(full_hypergraph, node_id, 2, 0)
            remove_other_neighbors_of_bool_literals(comp_neighborhood_hypergraph, node_id)
            ch_db_record = (comp_id, [comp_neighborhood_hypergraph], target_label)
            if process_compound_function:
                process_compound_function(ch_db_record)
#             ############
#             def get_key(value, dictionary):
#                 for key in dictionary:
#                     if dictionary[key] == value:
#                         return key
#                 return None
#             g = ch_db_record[1][0].copy()
#             for n in g.node:
#                 n_new_labels = []
#                 for n_color in g.node[n]['labels']:
#                     n_rdf_type = get_key(n_color, type_color_map)
#                     n_rdf_type = n_rdf_type[n_rdf_type.find(u"#") + 1:]
#                     n_new_labels.append(n_rdf_type)
#                 g.node[n]['labels'] = n_new_labels
#             g.visualize()
#             ############
            yield ch_db_record
Ejemplo n.º 35
0
 def testHypergraph_Copy(self):
     dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph)
     dummy_copy = dummy_hypergraph.copy()
     self.assertEqual(dummy_hypergraph, dummy_copy,
                      "The copy was not correct.")
Ejemplo n.º 36
0
def extract_rballs_from_rdf_server(entries, output_dir, r, edge_dir, sparql_endpoint="http://localhost:3030/ds/query",
                                   entries_count_expected=-1, sort_rdf_nodes_before_processing=True):
    '''Extract r-balls around the given entry nodes from the graph on the server using SPARQL queries.
    :param entries: the entry nodes (resources, URI/IRIs) which will serve as center nodes of the r-balls
    :param output_dir: the directory for writing the output files
    :param r: radius of the r-balls
    :param edge_dir: the direction of edges to be considered (0 - all edges, 1 - only outgoing, -1 - only incoming)
    :param sparql_endpoint: URL of the SPARQL end-point. Default is http://localhost:3030/ds/query (for Apache Jena Fuseki)
    :param entries_count_expected: Expected number of entries to process.
    :param sort_rdf_nodes_before_processing: Used to yield the same colors in multiple runs. 
    '''
    colors = None
    next_color_id = None
    
    nodes_count_distribution = {}
    type_distribution = {}
    def update_stats(nodes_count, target_labels, colors):
        def get_target_uri_map():
            target_uri_map = {}
            for uri in colors:
                if colors[uri] in target_labels:
                    target_uri_map[colors[uri]] = uri
                    if len(target_uri_map) == len(target_labels):
                        break
            return target_uri_map
        
        if nodes_count not in nodes_count_distribution:
            nodes_count_distribution[nodes_count] = 0
        nodes_count_distribution[nodes_count] += 1
        
        target_uri_map = get_target_uri_map()
        for target in target_uri_map:
            type_uri = target_uri_map[target]
            if type_uri not in type_distribution:
                type_distribution[type_uri] = 0
            type_distribution[type_uri] += 1
    
    start_time = time.time()
    
    for i, entry_uri in enumerate(entries):
#         # TODO: specific case of 2-in-balls
#         query_status, rdf_r_ball = rdf.quary_2_in_ball(entry_uri, sparql_endpoint)
        query_status, rdf_r_ball = rdf.quary_r_ball(entry_uri, r, edge_dir, sparql_endpoint, ignore_type_paths=True, include_types=True)
        assert not query_status
        r_ball, uri_nodes_map, colors, next_color_id = rdf.convert_rdf_graph_to_nx_graph(rdf_r_ball, test_mode=sort_rdf_nodes_before_processing,
                                                                                         return_colors=True, base_colors=colors, next_color_id=next_color_id)
        if entry_uri not in uri_nodes_map:
            # in case the r-ball is empty
            node_id = 0
            r_ball.add_node(node_id, labels=["0"])
            uri_nodes_map[entry_uri] = node_id
        
        center_node = uri_nodes_map[entry_uri]
        target_labels = list(r_ball.node[center_node]["labels"])
        # Make he center node of color 0 (owl:Thing)
        # The original colors of the center node serve as target labels of the r-ball
        r_ball.node[center_node]["labels"] = ["0"]
        hyper_r_ball = Hypergraph(r_ball)
        nodes_count = r_ball.number_of_nodes()
        if i % 10 == 0: # print every 100 records
            elapsed_time = time.time() - start_time
            if entries_count_expected == -1 or i == 0:
                time_est = "Elapsed time: {0:.2f}s".format(elapsed_time)
            else:
                time_left = (elapsed_time / i) * (entries_count_expected - i) 
                time_est = "Time left: {0:.2f}s".format(time_left)
            print i, time_est, nodes_count, entry_uri, target_labels
        update_stats(nodes_count, target_labels, colors)
        graph_database_record = (entry_uri, [hyper_r_ball], target_labels)
        inout.save_to_file(graph_database_record, output_dir + "r_ball_{0}".format(i))
    
    return nodes_count_distribution, type_distribution
Ejemplo n.º 37
0
 def testHypergraph_subgraph_with_labels(self):
     dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph)
     subgraph = dummy_hypergraph.subgraph_with_labels(set(["n_1", "n_6", "n_9", "n_10"]))
     isomorphic = algorithms.isomorphic(example_graphs.gt_dummy_subgraph, subgraph)
     self.assertTrue(isomorphic, "Incorrect subgraph extraction from hypergraph.")
 def testCharacteristicMatrix(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0)
     self.assertEqual(self.raw_ch_matrix_exp, ch_matrix.sparse_matrix, "The computed characteristic matrix is wrong.")
Ejemplo n.º 39
0
def run_algorithm(graph, return_features=False, compute_string=True):
    '''Performs the algorithm proposed by Arnborg & Proskurowski on a graph with tree-width at most 3.
    :param graph: A NetworkX graph or a Hypergraph.
    :param return_features: (default False) If true, returns the features, which
    were reduced by the algorithm.
    :param compute_string: (default True) If True returns the canonical string
    representation of the graph. False means to perform the reduction rules
    without computing the canonical string.
    :return A tuple of the form (tree_width, canonical_string[, reduced_features]).
    '''
    def is_done(hypergraph):
        if hypergraph.number_of_edges() == 0:
            return True
        else:
            return False
    
    def collect_labels(hypergraph):
        labels = []
        
        for node in hypergraph.nodes_iter():
            labels.append(hypergraph.node[node]["labels"][0])
        
        labels.sort()
        
        return u",".join(labels)
    
    def rule_0(hypergraph, compute_string):
        modified = False
        
        # (originally 1.3) - remove self-loops
        self_loops = list(hypergraph.self_loops)
        if len(self_loops) > 0:
            modified = True
        if compute_string:
            for self_loop in self_loops:
                node = hypergraph.endpoints(self_loop)[0]
                hypergraph.add_node_label(node, hypergraph.edge(self_loop)["labels"][0])
                hypergraph.remove_edge(self_loop)
        else:
            hypergraph.remove_edges_from(self_loops, unsafe=True)
        
        # rule 0.1
        if compute_string:
            nodes_with_more_labels = list(hypergraph.nodes_with_more_labels)
            if len(nodes_with_more_labels) > 0:
                modified = True
            
            for node in nodes_with_more_labels:
                labels = hypergraph.node[node]["labels"]
                labels.sort()
                new_label = u"(0.1;{0})".format(u",".join(labels))
                hypergraph.set_node_labels(node, [new_label])
            
            hypergraph.reset_nodes_with_more_labels()
        
        # rule 0.2
        parallel_edges_groups_keys = list(hypergraph.parallel_edges_groups.keys())
        
        if len(parallel_edges_groups_keys) > 0:
            modified = True
        
        for key in parallel_edges_groups_keys:
            edges_group = list(hypergraph.parallel_edges_groups[key])
            endpoints = hypergraph.endpoints(edges_group[0])
            if compute_string:
                perms = permutations(endpoints)
                possible_labels = []
                for perm in perms:
                    possible_label = {}
                    possible_label["perm"] = perm
                    possible_label["label"] = []
                    for edge in edges_group:
                        possible_label["label"].append(Hypergraph.edge_to_string(hypergraph, edge, perm))
                    possible_label["label"].sort()
                    possible_label["label"] = u"(0.2;{0})".format(u",".join(possible_label["label"]))
                    possible_labels.append(possible_label)
                possible_labels = sorted(possible_labels, key=lambda element: element["label"])
                minimal_label = possible_labels[0]["label"]
                minimal_perm_indices = filter(lambda i: possible_labels[i]["label"] == minimal_label, range(len(possible_labels)))
                direction = set([possible_labels[i]["perm"] for i in minimal_perm_indices])
                hypergraph.remove_edges_from(edges_group, unsafe=True)
                hypergraph.add_edge(endpoints, direction, minimal_label)
            else:
                hypergraph.remove_edges_from(edges_group, unsafe=True)
                hypergraph.add_edge(endpoints, set(), "")
        
        hypergraph.reset_parallel_edges_groups()
        
        return modified
        
    def rule_1(hypergraph, return_features=False, compute_string=True):
        modified = False
        pendant_features = ReducibleFeature.extract_rule_1_features(hypergraph)
        if return_features:
            pendant_features = list(pendant_features)
        
        affected_nodes = set()
        
        for feature in pendant_features:
            if not modified:
                modified = True
            feature.reduce(hypergraph, compute_string)
            affected_nodes |= set(feature.reducible_nodes) | set(feature.peripheral_nodes)
        
        hypergraph.update_nodes_with_n_neighbors(affected_nodes)
        
        return modified, pendant_features if return_features else None
    
    def rule_2(hypergraph, return_features=False, compute_string=True):
        modified = False
        series_features = ReducibleFeature.extract_rule_2_features(hypergraph)
        if return_features:
            series_features = list(series_features)
        
        affected_nodes = set()
        new_edges = set()
        
        for feature in series_features:
            if not modified:
                modified = True
            _new_edges = feature.reduce(hypergraph, compute_string)
            affected_nodes |= set(feature.reducible_nodes) | set(feature.peripheral_nodes)
            new_edges |= _new_edges
        
        hypergraph.update_parallel_edges_groups(new_edges)
        hypergraph.update_nodes_with_n_neighbors(affected_nodes)
        
        return modified, series_features if return_features else None
    
    def rule_3(hypergraph):
        modified = False
        
        parallel_hedges_groups_keys = list(hypergraph.parallel_hedges_groups.keys())
        
        if len(parallel_hedges_groups_keys) > 0:
            modified = True
        
        for key in parallel_hedges_groups_keys:
            hedges_group = hypergraph.parallel_hedges_groups[key]
            endpoints = hypergraph.endpoints(hedges_group[0])
            perms = permutations(endpoints)
            possible_labels = []
            for perm in perms:
                possible_label = {}
                possible_label["perm"] = perm
                possible_label["label"] = []
                for hedge in hedges_group:
                    possible_label["label"].append(Hypergraph.hedge_to_string(hypergraph, hedge, perm))
                possible_label["label"].sort()
                possible_label["label"] = u",".join(possible_label["label"])
                possible_labels.append(possible_label)
            possible_labels = sorted(possible_labels, key=lambda element: element["label"])
            minimal_label = possible_labels[0]["label"]
            minimal_perm_indices = filter(lambda i: possible_labels[i]["label"] == minimal_label, range(len(possible_labels)))
            direction = set([possible_labels[i]["perm"] for i in minimal_perm_indices])
            hypergraph.remove_edges_from(hedges_group, unsafe=True)
            hypergraph.add_edge(endpoints, direction, u"(3;{0})".format(minimal_label))
        
        hypergraph.reset_parallel_hedges_groups()
        
        return modified
    
    def rules_4_5_6_7(hypergraph, return_features=False, compute_string=True):
        modified = False
        degree_3_features = ReducibleFeature.extract_degree_3_features(hypergraph)
        if return_features:
            degree_3_features = list(degree_3_features)
        
        affected_nodes = set()
        new_edges = set()
        
        for feature in degree_3_features:
            if not modified:
                modified = True
            _new_edges = feature.reduce(hypergraph, compute_string)
            affected_nodes |= set(feature.reducible_nodes) | set(feature.peripheral_nodes)
            new_edges |= _new_edges
        
        new_hedges = set(filter(lambda edge_id: edge_id.startswith(u"he_"), new_edges))
        
        hypergraph.update_parallel_edges_groups(new_edges - new_hedges)
        hypergraph.update_parallel_hedges_groups(new_hedges)
        hypergraph.update_nodes_with_n_neighbors(affected_nodes)
        
        return modified, degree_3_features if return_features else None
    
    if type(graph) is not Hypergraph:
        hypergraph = Hypergraph(graph)
    else:
        hypergraph = graph.copy()
    
    features = []
    treewidth = 0
    
    if hypergraph.number_of_nodes() == 0:
        if return_features:
            return treewidth, "", features
        else:
            return treewidth, ""
    
    new_features = []
            
    while True:
        modified = False
        
        if return_features:
            features += new_features
        
#         hypergraph.visualize()
        
        # no need to check if modified here to continue, just go to the next rule after
        rule_0(hypergraph, compute_string)

        modified, new_features = rule_1(hypergraph, return_features, compute_string)
        if modified:
            if treewidth < 1:
                treewidth = 1
            continue

        modified, new_features = rule_2(hypergraph, return_features, compute_string)
        if modified:
            if treewidth < 2:
                treewidth = 2
            continue
        
        if compute_string:
            modified = rule_3(hypergraph)
            if modified:
                new_features = []
                continue

        modified, new_features = rules_4_5_6_7(hypergraph, return_features, compute_string)
        if modified:
            if treewidth < 3:
                treewidth = 3
            continue
        else:
            if is_done(hypergraph):
                if hypergraph.number_of_nodes() == 0:
                    sys.stderr.write("\n[ArnborgProskurowski] Error: empty graph produced.")
                    if return_features:
                        return treewidth, u"", features
                    else:
                        return treewidth, u""
                else:
                    canon_str = collect_labels(hypergraph) if compute_string else u""
                    if return_features:
                        features += new_features
                        return treewidth, canon_str, features
                    else:
                        return treewidth, canon_str
            else:
                if return_features:
                    features += new_features
                    return -1, u"Tree-width > 3", features
                else:
                    return -1, u"Tree-width > 3"
Ejemplo n.º 40
0
 def rule_0(hypergraph, compute_string):
     modified = False
     
     # (originally 1.3) - remove self-loops
     self_loops = list(hypergraph.self_loops)
     if len(self_loops) > 0:
         modified = True
     if compute_string:
         for self_loop in self_loops:
             node = hypergraph.endpoints(self_loop)[0]
             hypergraph.add_node_label(node, hypergraph.edge(self_loop)["labels"][0])
             hypergraph.remove_edge(self_loop)
     else:
         hypergraph.remove_edges_from(self_loops, unsafe=True)
     
     # rule 0.1
     if compute_string:
         nodes_with_more_labels = list(hypergraph.nodes_with_more_labels)
         if len(nodes_with_more_labels) > 0:
             modified = True
         
         for node in nodes_with_more_labels:
             labels = hypergraph.node[node]["labels"]
             labels.sort()
             new_label = u"(0.1;{0})".format(u",".join(labels))
             hypergraph.set_node_labels(node, [new_label])
         
         hypergraph.reset_nodes_with_more_labels()
     
     # rule 0.2
     parallel_edges_groups_keys = list(hypergraph.parallel_edges_groups.keys())
     
     if len(parallel_edges_groups_keys) > 0:
         modified = True
     
     for key in parallel_edges_groups_keys:
         edges_group = list(hypergraph.parallel_edges_groups[key])
         endpoints = hypergraph.endpoints(edges_group[0])
         if compute_string:
             perms = permutations(endpoints)
             possible_labels = []
             for perm in perms:
                 possible_label = {}
                 possible_label["perm"] = perm
                 possible_label["label"] = []
                 for edge in edges_group:
                     possible_label["label"].append(Hypergraph.edge_to_string(hypergraph, edge, perm))
                 possible_label["label"].sort()
                 possible_label["label"] = u"(0.2;{0})".format(u",".join(possible_label["label"]))
                 possible_labels.append(possible_label)
             possible_labels = sorted(possible_labels, key=lambda element: element["label"])
             minimal_label = possible_labels[0]["label"]
             minimal_perm_indices = filter(lambda i: possible_labels[i]["label"] == minimal_label, range(len(possible_labels)))
             direction = set([possible_labels[i]["perm"] for i in minimal_perm_indices])
             hypergraph.remove_edges_from(edges_group, unsafe=True)
             hypergraph.add_edge(endpoints, direction, minimal_label)
         else:
             hypergraph.remove_edges_from(edges_group, unsafe=True)
             hypergraph.add_edge(endpoints, set(), "")
     
     hypergraph.reset_parallel_edges_groups()
     
     return modified
Ejemplo n.º 41
0
 def testHypergraph_edges_iter(self):
     dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph)
     self.assertEqual(len(list(dummy_hypergraph.edges_iter())), 32)
     self.assertEqual(set(dummy_hypergraph.edges_iter("n_6")), set(["e_5", "e_9", "e_13", "e_28"]))
     self.assertEqual(set(dummy_hypergraph.edges_iter("n_5", "n_1")), set(["e_15"]))
Ejemplo n.º 42
0
dataset = "drugadmin"
wl_iter_range = [3] # range(0, 10)
k_L_range = [
    (20, 1),    # inflection point ~0.
    (15, 5),    # inflection point 0.1
    (10, 9),    # inflection point 0.2
    (7, 12),    # inflection point 0.3
    (5, 13),    # inflection point 0.4
    (4, 16),    # inflection point 0.5
    (3, 16),    # inflection point 0.6
    (2, 11),    # inflection point 0.7
    (2, 25),    # inflection point 0.8
    (1, 10),    # inflection point 0.9
    (1, 20),    # inflection point ~1.
]
infl_point_range = [0., 0.0000001, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.]
p_range = [1]
r_in_range = [3] # range(0, 4)
r_out_range = [2] # range(0, 4)
r_all_range = [0]

output_dir = "../output_rdf/crossval_test/"

if __name__ == '__main__':
    in_files = helpers.datasets[dataset]["files"]
    graph, node_id_map = rdf.convert_rdf_to_nx_graph(in_files, discard_classes=False)
    hypergraph = Hypergraph(graph)
    best_model = crossval.loo_crossval(hypergraph, wl_iter_range, r_in_range, r_out_range, r_all_range, output_dir, infl_point_range=infl_point_range)
#     best_model = crossval.loo_crossval(hypergraph, wl_iter_range, r_in_range, r_out_range, r_all_range, output_dir, k_L_range=k_L_range)
    print "Best model:", best_model
Ejemplo n.º 43
0
 def testHypergraph_Copy(self):
     dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph)
     dummy_copy = dummy_hypergraph.copy()
     self.assertEqual(dummy_hypergraph, dummy_copy, "The copy was not correct.")