def loo_crossval_naive(graph_database, wl_iter_range, param_2_range, quality_function, output_dir, base_model={}, shingles_type="features", window_size=5, accumulate_wl_shingles=True): '''Similar to loo_crossval_sketch but computes directly the Jaccard similarities between the columns in the characteristic matrix, without using a sketch matrix. Not applicable for big datasets. ''' best_model = model_p(-1, -1, -1, base_model=base_model) cols_count = len(graph_database) models_file = open(output_dir + "models_naive", "a") for wl_iterations in wl_iter_range: ch_matrix = CharacteristicMatrix(graph_database, cols_count, wl_iterations=wl_iterations, shingles_type=shingles_type, window_size=window_size, accumulate_wl_shingles=accumulate_wl_shingles) jaccard_similarity_matrix = ch_matrix.compute_jaccard_similarity_matrix() for p in param_2_range: avg_quality = 0. for i in range(cols_count): avg_quality += float(quality_function(i, jaccard_similarity_matrix, p)) avg_quality /= cols_count current_model = model_p(avg_quality, wl_iterations, p, base_model=base_model) print current_model models_file.write(str(current_model) + ",\n") models_file.flush() if avg_quality > best_model["quality"]: best_model = current_model if not base_model: # print best model when there are no outer parameters models_file.write("Best model: " + str(best_model) + "\n") models_file.close() return best_model
def testCharacteristicMatrix_JaccardSimMatrix(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix() equality = (self.ch_matrix_jaccard_sim_exp == ch_matrix_jaccard_sim).all() self.assertTrue(equality, "The computed Jaccard similarity matrix is wrong.")
def testSimilarNodesMining(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix() similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8, dtype=np.float32) sketch_matrix = SketchMatrix(25, 265, ch_matrix) similarity_matrix = similar_nodes_mining.get_node_similarity_matrix(sketch_matrix) equality = (similarity_matrix_exp == similarity_matrix).all() self.assertTrue(equality, "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.).")
def testCharacteristicMatrix_JaccardSimMatrix(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database( dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix() equality = ( self.ch_matrix_jaccard_sim_exp == ch_matrix_jaccard_sim).all() self.assertTrue(equality, "The computed Jaccard similarity matrix is wrong.")
def loo_crossval_naive(graph_database, wl_iter_range, param_2_range, quality_function, output_dir, base_model={}, shingles_type="features", window_size=5, accumulate_wl_shingles=True): '''Similar to loo_crossval_sketch but computes directly the Jaccard similarities between the columns in the characteristic matrix, without using a sketch matrix. Not applicable for big datasets. ''' best_model = model_p(-1, -1, -1, base_model=base_model) cols_count = len(graph_database) models_file = open(output_dir + "models_naive", "a") for wl_iterations in wl_iter_range: ch_matrix = CharacteristicMatrix( graph_database, cols_count, wl_iterations=wl_iterations, shingles_type=shingles_type, window_size=window_size, accumulate_wl_shingles=accumulate_wl_shingles) jaccard_similarity_matrix = ch_matrix.compute_jaccard_similarity_matrix( ) for p in param_2_range: avg_quality = 0. for i in range(cols_count): avg_quality += float( quality_function(i, jaccard_similarity_matrix, p)) avg_quality /= cols_count current_model = model_p(avg_quality, wl_iterations, p, base_model=base_model) print current_model models_file.write(str(current_model) + ",\n") models_file.flush() if avg_quality > best_model["quality"]: best_model = current_model if not base_model: # print best model when there are no outer parameters models_file.write("Best model: " + str(best_model) + "\n") models_file.close() return best_model
def testSimilarNodesMining(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database( dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix() similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8, dtype=np.float32) sketch_matrix = SketchMatrix(25, 265, ch_matrix) similarity_matrix = similar_nodes_mining.get_node_similarity_matrix( sketch_matrix) equality = (similarity_matrix_exp == similarity_matrix).all() self.assertTrue( equality, "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.)." )