def testSimilarNodesMining(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix() similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8, dtype=np.float32) sketch_matrix = SketchMatrix(25, 265, ch_matrix) similarity_matrix = similar_nodes_mining.get_node_similarity_matrix(sketch_matrix) equality = (similarity_matrix_exp == similarity_matrix).all() self.assertTrue(equality, "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.).")
def testSimilarNodesMining(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database( dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix() similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8, dtype=np.float32) sketch_matrix = SketchMatrix(25, 265, ch_matrix) similarity_matrix = similar_nodes_mining.get_node_similarity_matrix( sketch_matrix) equality = (similarity_matrix_exp == similarity_matrix).all() self.assertTrue( equality, "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.)." )
print "Reading Column index to Node map took", time.time() - start, "s" print "-----------------------------------------" return sketch_matrix, index_node_map, node_id_map if __name__ == '__main__': ch_matrix, hypergraph, index_node_map, node_id_map = calculate_ch_matrix() # ch_matrix, hypergraph, index_node_map, node_id_map = load_ch_matrix() sketch_matrix = calculate_sketch_matrix(ch_matrix, hypergraph) # sketch_matrix, index_node_map, node_id_map = load_sketch_matrix() print "Building similarity matrix started at", time.strftime(time_format) start = time.time() sim_mat = similar_nodes_mining.get_node_similarity_matrix(sketch_matrix) print "Building similarity matrix took", time.time() - start, "s" print "-----------------------------------------" print "Extracting similar nodes started at", time.strftime(time_format) start = time.time() similar_nodes = similar_nodes_mining.get_all_similar_nodes( sim_mat, index_node_map) print "Extracting similar nodes took", time.time() - start, "s" print "-----------------------------------------" print "Saving similar nodes started at", time.strftime(time_format) start = time.time() inout.save_to_file(similar_nodes, path + "{0}_similar_nodes".format(dataset)) print "Saving similar nodes took", time.time() - start, "s"
index_node_map = inout.load_from_file(path + "{0}_index_node_map".format(dataset)) print "Reading Column index to Node map took", time.time() - start, "s" print "-----------------------------------------" return sketch_matrix, index_node_map, node_id_map if __name__ == '__main__': ch_matrix, hypergraph, index_node_map, node_id_map = calculate_ch_matrix() # ch_matrix, hypergraph, index_node_map, node_id_map = load_ch_matrix() sketch_matrix = calculate_sketch_matrix(ch_matrix, hypergraph) # sketch_matrix, index_node_map, node_id_map = load_sketch_matrix() print "Building similarity matrix started at", time.strftime(time_format) start = time.time() sim_mat = similar_nodes_mining.get_node_similarity_matrix(sketch_matrix) print "Building similarity matrix took", time.time() - start, "s" print "-----------------------------------------" print "Extracting similar nodes started at", time.strftime(time_format) start = time.time() similar_nodes = similar_nodes_mining.get_all_similar_nodes(sim_mat, index_node_map) print "Extracting similar nodes took", time.time() - start, "s" print "-----------------------------------------" print "Saving similar nodes started at", time.strftime(time_format) start = time.time() inout.save_to_file(similar_nodes, path + "{0}_similar_nodes".format(dataset)) print "Saving similar nodes took", time.time() - start, "s" print "-----------------------------------------"