def testGetSimilarNodesToQueryNode(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=0)
     sketch_matrix = SketchMatrix(25, 265, ch_matrix)
     similar_nodes_exp = np.array([0, 5, 7])
     similar_nodes, _ = similar_nodes_mining.get_similar_nodes(
         "n_7",
         dummy_hypergraph,
         sketch_matrix,
         0, [],
         r_in=3,
         r_out=2,
         r_all=0)
     equality = similar_nodes_exp == similar_nodes
     if type(equality) is not bool:
         equality = equality.all()
     self.assertTrue(
         equality,
         "Wrong similar nodes were extracted (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.)."
     )
 def testCharacteristicMatrix_JaccardSimMatrix(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     equality = (self.ch_matrix_jaccard_sim_exp == ch_matrix_jaccard_sim).all()
     self.assertTrue(equality, "The computed Jaccard similarity matrix is wrong.")
 def testCharacteristicMatrix_ReadWrite(self):
     file_name = "test_files/characteristic_matrix.tmp"
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=2, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=4)
     ch_matrix.save_to_file(file_name)
     read_ch_matrix = CharacteristicMatrix.load_from_file(file_name)
     self.assertEqual(read_ch_matrix, ch_matrix, "The read characteristic matrix is different from the saved one.")
def calculate_ch_matrix():
    in_files = helpers.datasets[dataset]["files"]

    print "Converting RDF to NetworkX graph started at", time.strftime(
        time_format)
    start = time.time()
    graph, node_id_map = rdf.convert_rdf_to_nx_graph(in_files,
                                                     discard_classes=False)
    print "Converting RDF to NetworkX graph took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Saving NodeID map started at", time.strftime(time_format)
    start = time.time()
    inout.save_to_file(node_id_map, path + "{0}_node_id_map".format(dataset))
    print "Saving NodeID map took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Building hypergraph started at", time.strftime(time_format)
    start = time.time()
    hypergraph = Hypergraph(graph)
    print "Building hypergraph took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Saving hypergraph started at", time.strftime(time_format)
    start = time.time()
    hypergraph.save_to_file(path + "{0}_hgraph".format(dataset))
    print "Saving hypergraph took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Building characteristic matrix started at", time.strftime(
        time_format)
    start = time.time()
    rballs_database, index_node_map = similar_nodes_mining.extract_rballs_database(
        hypergraph, r_in=r_in, r_out=r_out, r_all=r_all)
    ch_matrix = CharacteristicMatrix(rballs_database,
                                     hypergraph.number_of_nodes(),
                                     wl_iterations=wl_iterations,
                                     print_progress=True)
    print "Building characteristic matrix took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Saving Column index to Node map started at", time.strftime(
        time_format)
    start = time.time()
    inout.save_to_file(index_node_map,
                       path + "{0}_index_node_map".format(dataset))
    print "Saving Column index to Node map took", time.time() - start, "s"
    print "-----------------------------------------"

    print "Saving characteristic matrix started at", time.strftime(time_format)
    start = time.time()
    ch_matrix.save_to_file(path + "{0}_ch_matrix".format(dataset))
    print "Saving characteristic matrix took", time.time() - start, "s"
    print "-----------------------------------------"

    return ch_matrix, hypergraph, index_node_map, node_id_map
 def testCharacteristicMatrix(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=0)
     self.assertEqual(self.raw_ch_matrix_exp, ch_matrix.sparse_matrix,
                      "The computed characteristic matrix is wrong.")
 def testSketchMatrix_ReadWrite(self):
     file_name = "test_files/sketch_matrix.tmp"
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=2, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=4)
     sketch_matrix = SketchMatrix(5, 20, ch_matrix)
     sketch_matrix.save_to_file(file_name)
     read_sketch_matrix = SketchMatrix.load_from_file(file_name)
     equality = (read_sketch_matrix.matrix == sketch_matrix.matrix).all()
     self.assertTrue(equality, "The read sketch matrix is different from the saved one.")
 def testSimilarNodesMining(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8, dtype=np.float32)
     sketch_matrix = SketchMatrix(25, 265, ch_matrix)
     similarity_matrix = similar_nodes_mining.get_node_similarity_matrix(sketch_matrix)
     equality = (similarity_matrix_exp == similarity_matrix).all()
     self.assertTrue(equality, "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.).")
 def testGetSimilarNodesToQueryNode(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0)
     sketch_matrix = SketchMatrix(25, 265, ch_matrix)
     similar_nodes_exp = np.array([0, 5, 7])
     similar_nodes, _ = similar_nodes_mining.get_similar_nodes("n_7", dummy_hypergraph, sketch_matrix, 0, [], r_in=3, r_out=2, r_all=0)
     equality = similar_nodes_exp == similar_nodes
     if type(equality) is not bool:
         equality = equality.all()
     self.assertTrue(equality, "Wrong similar nodes were extracted (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.).")
 def testCharacteristicMatrix_JaccardSimMatrix(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     equality = (
         self.ch_matrix_jaccard_sim_exp == ch_matrix_jaccard_sim).all()
     self.assertTrue(equality,
                     "The computed Jaccard similarity matrix is wrong.")
 def testCharacteristicMatrix_ReadWrite(self):
     file_name = "test_files/characteristic_matrix.tmp"
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=2, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=4)
     ch_matrix.save_to_file(file_name)
     read_ch_matrix = CharacteristicMatrix.load_from_file(file_name)
     self.assertEqual(
         read_ch_matrix, ch_matrix,
         "The read characteristic matrix is different from the saved one.")
 def testSketchMatrix_ReadWrite(self):
     file_name = "test_files/sketch_matrix.tmp"
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=2, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=4)
     sketch_matrix = SketchMatrix(5, 20, ch_matrix)
     sketch_matrix.save_to_file(file_name)
     read_sketch_matrix = SketchMatrix.load_from_file(file_name)
     equality = (read_sketch_matrix.matrix == sketch_matrix.matrix).all()
     self.assertTrue(
         equality,
         "The read sketch matrix is different from the saved one.")
예제 #12
0
def calculate_ch_matrix():
    in_files = helpers.datasets[dataset]["files"]
    
    print "Converting RDF to NetworkX graph started at", time.strftime(time_format)
    start = time.time()
    graph, node_id_map = rdf.convert_rdf_to_nx_graph(in_files, discard_classes=False)
    print "Converting RDF to NetworkX graph took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Saving NodeID map started at", time.strftime(time_format)
    start = time.time()
    inout.save_to_file(node_id_map, path + "{0}_node_id_map".format(dataset))
    print "Saving NodeID map took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Building hypergraph started at", time.strftime(time_format)
    start = time.time()
    hypergraph = Hypergraph(graph)
    print "Building hypergraph took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Saving hypergraph started at", time.strftime(time_format)
    start = time.time()
    hypergraph.save_to_file(path + "{0}_hgraph".format(dataset))
    print "Saving hypergraph took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Building characteristic matrix started at", time.strftime(time_format)
    start = time.time()
    rballs_database, index_node_map = similar_nodes_mining.extract_rballs_database(hypergraph, r_in=r_in, r_out=r_out, r_all=r_all)
    ch_matrix = CharacteristicMatrix(rballs_database, hypergraph.number_of_nodes(), wl_iterations=wl_iterations, print_progress=True)
    print "Building characteristic matrix took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Saving Column index to Node map started at", time.strftime(time_format)
    start = time.time()
    inout.save_to_file(index_node_map, path + "{0}_index_node_map".format(dataset))
    print "Saving Column index to Node map took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Saving characteristic matrix started at", time.strftime(time_format)
    start = time.time()
    ch_matrix.save_to_file(path + "{0}_ch_matrix".format(dataset))
    print "Saving characteristic matrix took", time.time() - start, "s"
    print "-----------------------------------------"
    
    return ch_matrix, hypergraph, index_node_map, node_id_map
 def testSimilarNodesMining(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8,
                                      dtype=np.float32)
     sketch_matrix = SketchMatrix(25, 265, ch_matrix)
     similarity_matrix = similar_nodes_mining.get_node_similarity_matrix(
         sketch_matrix)
     equality = (similarity_matrix_exp == similarity_matrix).all()
     self.assertTrue(
         equality,
         "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.)."
     )
예제 #14
0
def loo_crossval(hypergraph, wl_iter_range, r_in_range, r_out_range, r_all_range, output_dir, k_L_range=None, infl_point_range=None, p_range=None):
    best_model = sgm_crossval.model(-1, -1, base_model=model(-1, -1, -1))
    
    for r_in in r_in_range:
        for r_out in r_out_range:
            for r_all in r_all_range:
                base_model = model(r_in, r_out, r_all)
                rballs_database, _ = similar_nodes_mining.extract_rballs_database(hypergraph, r_in=r_in, r_out=r_out, r_all=r_all, center_default_color=True)
                rballs_database = [(r_id, list(graphs), t) for r_id, graphs, t in rballs_database] # execute generator
                if k_L_range:
                    current_model = sgm_crossval.loo_crossval_sketch(rballs_database, wl_iter_range, k_L_range, output_dir, base_model=base_model)
                elif infl_point_range:
                    current_model = sgm_crossval.loo_crossval_threshold(rballs_database, wl_iter_range, infl_point_range, output_dir, base_model=base_model)
                else:
                    current_model = sgm_crossval.loo_crossval_pnn(rballs_database, wl_iter_range, p_range, output_dir, base_model=base_model)
                if current_model["quality"] > best_model["quality"]:
                    best_model = current_model
    
    models_file = open(output_dir + "models", "a")
    models_file.write(str(best_model) + ",\n")
    models_file.close()
    
    return best_model
 def testCharacteristicMatrix(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0)
     self.assertEqual(self.raw_ch_matrix_exp, ch_matrix.sparse_matrix, "The computed characteristic matrix is wrong.")