def testSimilarNodesMining(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8, dtype=np.float32)
     sketch_matrix = SketchMatrix(25, 265, ch_matrix)
     similarity_matrix = similar_nodes_mining.get_node_similarity_matrix(sketch_matrix)
     equality = (similarity_matrix_exp == similarity_matrix).all()
     self.assertTrue(equality, "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.).")
Ejemplo n.º 2
0
    index_node_map = inout.load_from_file(path + "{0}_index_node_map".format(dataset))
    print "Reading Column index to Node map took", time.time() - start, "s"
    print "-----------------------------------------"
    
    return sketch_matrix, index_node_map, node_id_map

if __name__ == '__main__':
    ch_matrix, hypergraph, index_node_map, node_id_map = calculate_ch_matrix()
#     ch_matrix, hypergraph, index_node_map, node_id_map = load_ch_matrix()
    
    sketch_matrix = calculate_sketch_matrix(ch_matrix, hypergraph)
#     sketch_matrix, index_node_map, node_id_map = load_sketch_matrix()
    
    print "Building similarity matrix started at", time.strftime(time_format)
    start = time.time()
    sim_mat = similar_nodes_mining.get_node_similarity_matrix(sketch_matrix)
    print "Building similarity matrix took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Extracting similar nodes started at", time.strftime(time_format)
    start = time.time()
    similar_nodes = similar_nodes_mining.get_all_similar_nodes(sim_mat, index_node_map)
    print "Extracting similar nodes took", time.time() - start, "s"
    print "-----------------------------------------"
    
    print "Saving similar nodes started at", time.strftime(time_format)
    start = time.time()
    inout.save_to_file(similar_nodes, path + "{0}_similar_nodes".format(dataset))
    print "Saving similar nodes took", time.time() - start, "s"
    print "-----------------------------------------"