def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True): # initialize ged env. ged_env = GEDEnv() ged_env.set_edit_cost(options['edit_cost'], edit_cost_constants=options['edit_cost_constants']) for g in graphs: ged_env.add_nx_graph(g, '') listID = ged_env.get_all_graph_ids() node_labels = ged_env.get_all_node_labels() edge_labels = ged_env.get_all_edge_labels() node_label_costs = label_costs_to_matrix( options['node_label_costs'], len(node_labels)) if 'node_label_costs' in options else None edge_label_costs = label_costs_to_matrix( options['edge_label_costs'], len(edge_labels)) if 'edge_label_costs' in options else None ged_env.set_label_costs(node_label_costs, edge_label_costs) ged_env.init(init_type=options['init_option']) if parallel: options['threads'] = 1 ged_env.set_method(options['method'], options) ged_env.init_method() # compute ged. # options used to compute numbers of edit operations. neo_options = { 'edit_cost': options['edit_cost'], 'is_cml': True, 'node_labels': node_labels, 'edge_labels': edge_labels } ged_mat = np.zeros((len(graphs), len(graphs))) if parallel: len_itr = int(len(graphs) * (len(graphs) - 1) / 2) ged_vec = [0 for i in range(len_itr)] n_edit_operations = [0 for i in range(len_itr)] itr = combinations(range(0, len(graphs)), 2) n_jobs = multiprocessing.cpu_count() if len_itr < 100 * n_jobs: chunksize = int(len_itr / n_jobs) + 1 else: chunksize = 100 def init_worker(graphs_toshare, ged_env_toshare, listID_toshare): global G_graphs, G_ged_env, G_listID G_graphs = graphs_toshare G_ged_env = ged_env_toshare G_listID = listID_toshare do_partial = partial(_wrapper_compute_ged_parallel, neo_options, sort) pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID)) if verbose: iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize), desc='computing GEDs', file=sys.stdout) else: iterator = pool.imap_unordered(do_partial, itr, chunksize) # iterator = pool.imap_unordered(do_partial, itr, chunksize) for i, j, dis, n_eo_tmp in iterator: idx_itr = int(len(graphs) * i + j - (i + 1) * (i + 2) / 2) ged_vec[idx_itr] = dis ged_mat[i][j] = dis ged_mat[j][i] = dis n_edit_operations[idx_itr] = n_eo_tmp # print('\n-------------------------------------------') # print(i, j, idx_itr, dis) pool.close() pool.join() else: ged_vec = [] n_edit_operations = [] if verbose: iterator = tqdm(range(len(graphs)), desc='computing GEDs', file=sys.stdout) else: iterator = range(len(graphs)) for i in iterator: # for i in range(len(graphs)): for j in range(i + 1, len(graphs)): if nx.number_of_nodes(graphs[i]) <= nx.number_of_nodes( graphs[j]) or not sort: dis, pi_forward, pi_backward = _compute_ged( ged_env, listID[i], listID[j], graphs[i], graphs[j]) else: dis, pi_backward, pi_forward = _compute_ged( ged_env, listID[j], listID[i], graphs[j], graphs[i]) ged_vec.append(dis) ged_mat[i][j] = dis ged_mat[j][i] = dis n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options) n_edit_operations.append(n_eo_tmp) return ged_vec, ged_mat, n_edit_operations
# Predefined dataset name, use dataset "MUTAG". ds_name = 'MUTAG' # Initialize a Dataset. dataset = Dataset() # Load predefined dataset "MUTAG". dataset.load_predefined_dataset(ds_name) graph1 = dataset.graphs[0] graph2 = dataset.graphs[1] print(graph1, graph2) """**2. Compute graph edit distance.**""" from gklearn.ged.env import GEDEnv ged_env = GEDEnv() # initailize GED environment. ged_env.set_edit_cost( 'CONSTANT', # GED cost type. edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs. ) ged_env.add_nx_graph(graph1, '') # add graph1 ged_env.add_nx_graph(graph2, '') # add graph2 listID = ged_env.get_all_graph_ids() # get list IDs of graphs ged_env.init( init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment. options = { 'initialization_method': 'RANDOM', # or 'NODE', etc. 'threads': 1 # parallel threads. } ged_env.set_method( 'BIPARTITE', # GED method.
def test_GEDEnv(): """Test GEDEnv. """ """**1. Get dataset.**""" from gklearn.utils import Dataset # Predefined dataset name, use dataset "MUTAG". ds_name = 'MUTAG' # Initialize a Dataset. dataset = Dataset() # Load predefined dataset "MUTAG". dataset.load_predefined_dataset(ds_name) graph1 = dataset.graphs[0] graph2 = dataset.graphs[1] """**2. Compute graph edit distance.**""" try: from gklearn.ged.env import GEDEnv ged_env = GEDEnv() # initailize GED environment. ged_env.set_edit_cost( 'CONSTANT', # GED cost type. edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs. ) ged_env.add_nx_graph(graph1, '') # add graph1 ged_env.add_nx_graph(graph2, '') # add graph2 listID = ged_env.get_all_graph_ids() # get list IDs of graphs ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES' ) # initialize GED environment. options = { 'initialization_method': 'RANDOM', # or 'NODE', etc. 'threads': 1 # parallel threads. } ged_env.set_method( 'BIPARTITE', # GED method. options # options for GED method. ) ged_env.init_method() # initialize GED method. ged_env.run_method(listID[0], listID[1]) # run. pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map. pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map. dis = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs. import networkx as nx assert len(pi_forward) == nx.number_of_nodes(graph1), len( pi_backward) == nx.number_of_nodes(graph2) except Exception as exception: assert False, exception
def __gmg_bcu(self): """ The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG). Returns ------- None. """ # Set up the ged environment. ged_env = GEDEnv() # @todo: maybe create a ged_env as a private varible. # gedlibpy.restart_env() ged_env.set_edit_cost(self.__ged_options['edit_cost'], edit_cost_constants=self.__edit_cost_constants) graphs = [self.__clean_graph(g) for g in self._dataset.graphs] for g in graphs: ged_env.add_nx_graph(g, '') graph_ids = ged_env.get_all_graph_ids() set_median_id = ged_env.add_graph('set_median') gen_median_id = ged_env.add_graph('gen_median') ged_env.init(init_type=self.__ged_options['init_option']) # Set up the madian graph estimator. self.__mge = MedianGraphEstimatorPy(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options) options = self.__mge_options.copy() if not 'seed' in options: options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. options['parallel'] = self.__parallel # Select the GED algorithm. self.__mge.set_options(mge_options_to_string(options)) self.__mge.set_label_names(node_labels=self._dataset.node_labels, edge_labels=self._dataset.edge_labels, node_attrs=self._dataset.node_attrs, edge_attrs=self._dataset.edge_attrs) ged_options = self.__ged_options.copy() if self.__parallel: ged_options['threads'] = 1 self.__mge.set_init_method(ged_options['method'], ged_options) self.__mge.set_descent_method(ged_options['method'], ged_options) # Run the estimator. self.__mge.run(graph_ids, set_median_id, gen_median_id) # Get SODs. self.__sod_set_median = self.__mge.get_sum_of_distances('initialized') self.__sod_gen_median = self.__mge.get_sum_of_distances('converged') # Get median graphs. self.__set_median = ged_env.get_nx_graph(set_median_id) self.__gen_median = ged_env.get_nx_graph(gen_median_id)
def _compute_ged(dataset, node_label_costs, edge_label_costs): from gklearn.ged.env import GEDEnv from gklearn.ged.util.util import label_costs_to_matrix import networkx as nx ged_env = GEDEnv() # initailize GED environment. ged_env.set_edit_cost('CONSTANT', # GED cost type. edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs. ) for g in dataset.graphs: ged_env.add_nx_graph(g, '') # add graphs node_labels = ged_env.get_all_node_labels() edge_labels = ged_env.get_all_edge_labels() listID = ged_env.get_all_graph_ids() # get list IDs of graphs ged_env.set_label_costs(label_costs_to_matrix(node_label_costs, len(node_labels)), label_costs_to_matrix(edge_label_costs, len(edge_labels))) ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment. options = {'initialization_method': 'RANDOM', # or 'NODE', etc. 'threads': 1 # parallel threads. } ged_env.set_method('BIPARTITE', # GED method. options # options for GED method. ) ged_env.init_method() # initialize GED method. ged_env.run_method(listID[0], listID[1]) # run. pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map. pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map. dis = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs. # make the map label correct (label remove map as np.inf) nodes1 = [n for n in dataset.graphs[0].nodes()] nodes2 = [n for n in dataset.graphs[1].nodes()] nb1 = nx.number_of_nodes(dataset.graphs[0]) nb2 = nx.number_of_nodes(dataset.graphs[1]) pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] return pi_forward, pi_backward, dis, node_labels, edge_labels
def compute_geds_by_GEDEnv(dataset): from gklearn.ged.env import GEDEnv import numpy as np graph1 = dataset.graphs[0] graph2 = dataset.graphs[1] ged_env = GEDEnv() # initailize GED environment. ged_env.set_edit_cost( 'CONSTANT', # GED cost type. edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs. ) for g in dataset.graphs[0:10]: ged_env.add_nx_graph(g, '') # ged_env.add_nx_graph(graph1, '') # add graph1 # ged_env.add_nx_graph(graph2, '') # add graph2 listID = ged_env.get_all_graph_ids() # get list IDs of graphs ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES' ) # initialize GED environment. options = { 'threads': 1 # parallel threads. } ged_env.set_method( 'BIPARTITE', # GED method. options # options for GED method. ) ged_env.init_method() # initialize GED method. ged_mat = np.empty((10, 10)) for i in range(0, 10): for j in range(i, 10): ged_env.run_method(i, j) # run. ged_mat[i, j] = ged_env.get_upper_bound(i, j) ged_mat[j, i] = ged_mat[i, j] results = {} results['pi_forward'] = ged_env.get_forward_map(listID[0], listID[1]) # forward map. results['pi_backward'] = ged_env.get_backward_map( listID[0], listID[1]) # backward map. results['upper_bound'] = ged_env.get_upper_bound( listID[0], listID[1]) # GED bewteen two graphs. results['runtime'] = ged_env.get_runtime(listID[0], listID[1]) results['init_time'] = ged_env.get_init_time() results['ged_mat'] = ged_mat return results