Ejemplo n.º 1
0
def compute_geds_cml(graphs,
                     options={},
                     sort=True,
                     parallel=False,
                     verbose=True):

    # initialize ged env.
    ged_env = GEDEnv()
    ged_env.set_edit_cost(options['edit_cost'],
                          edit_cost_constants=options['edit_cost_constants'])
    for g in graphs:
        ged_env.add_nx_graph(g, '')
    listID = ged_env.get_all_graph_ids()

    node_labels = ged_env.get_all_node_labels()
    edge_labels = ged_env.get_all_edge_labels()
    node_label_costs = label_costs_to_matrix(
        options['node_label_costs'],
        len(node_labels)) if 'node_label_costs' in options else None
    edge_label_costs = label_costs_to_matrix(
        options['edge_label_costs'],
        len(edge_labels)) if 'edge_label_costs' in options else None
    ged_env.set_label_costs(node_label_costs, edge_label_costs)
    ged_env.init(init_type=options['init_option'])
    if parallel:
        options['threads'] = 1
    ged_env.set_method(options['method'], options)
    ged_env.init_method()

    # compute ged.
    # options used to compute numbers of edit operations.
    neo_options = {
        'edit_cost': options['edit_cost'],
        'is_cml': True,
        'node_labels': node_labels,
        'edge_labels': edge_labels
    }
    ged_mat = np.zeros((len(graphs), len(graphs)))
    if parallel:
        len_itr = int(len(graphs) * (len(graphs) - 1) / 2)
        ged_vec = [0 for i in range(len_itr)]
        n_edit_operations = [0 for i in range(len_itr)]
        itr = combinations(range(0, len(graphs)), 2)
        n_jobs = multiprocessing.cpu_count()
        if len_itr < 100 * n_jobs:
            chunksize = int(len_itr / n_jobs) + 1
        else:
            chunksize = 100

        def init_worker(graphs_toshare, ged_env_toshare, listID_toshare):
            global G_graphs, G_ged_env, G_listID
            G_graphs = graphs_toshare
            G_ged_env = ged_env_toshare
            G_listID = listID_toshare

        do_partial = partial(_wrapper_compute_ged_parallel, neo_options, sort)
        pool = Pool(processes=n_jobs,
                    initializer=init_worker,
                    initargs=(graphs, ged_env, listID))
        if verbose:
            iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
                            desc='computing GEDs',
                            file=sys.stdout)
        else:
            iterator = pool.imap_unordered(do_partial, itr, chunksize)
#		iterator = pool.imap_unordered(do_partial, itr, chunksize)
        for i, j, dis, n_eo_tmp in iterator:
            idx_itr = int(len(graphs) * i + j - (i + 1) * (i + 2) / 2)
            ged_vec[idx_itr] = dis
            ged_mat[i][j] = dis
            ged_mat[j][i] = dis
            n_edit_operations[idx_itr] = n_eo_tmp


#			print('\n-------------------------------------------')
#			print(i, j, idx_itr, dis)
        pool.close()
        pool.join()

    else:
        ged_vec = []
        n_edit_operations = []
        if verbose:
            iterator = tqdm(range(len(graphs)),
                            desc='computing GEDs',
                            file=sys.stdout)
        else:
            iterator = range(len(graphs))
        for i in iterator:
            #		for i in range(len(graphs)):
            for j in range(i + 1, len(graphs)):
                if nx.number_of_nodes(graphs[i]) <= nx.number_of_nodes(
                        graphs[j]) or not sort:
                    dis, pi_forward, pi_backward = _compute_ged(
                        ged_env, listID[i], listID[j], graphs[i], graphs[j])
                else:
                    dis, pi_backward, pi_forward = _compute_ged(
                        ged_env, listID[j], listID[i], graphs[j], graphs[i])
                ged_vec.append(dis)
                ged_mat[i][j] = dis
                ged_mat[j][i] = dis
                n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j],
                                                  pi_forward, pi_backward,
                                                  **neo_options)
                n_edit_operations.append(n_eo_tmp)

    return ged_vec, ged_mat, n_edit_operations
# Predefined dataset name, use dataset "MUTAG".
ds_name = 'MUTAG'

# Initialize a Dataset.
dataset = Dataset()
# Load predefined dataset "MUTAG".
dataset.load_predefined_dataset(ds_name)
graph1 = dataset.graphs[0]
graph2 = dataset.graphs[1]
print(graph1, graph2)
"""**2.  Compute graph edit distance.**"""

from gklearn.ged.env import GEDEnv

ged_env = GEDEnv()  # initailize GED environment.
ged_env.set_edit_cost(
    'CONSTANT',  # GED cost type.
    edit_cost_constants=[3, 3, 1, 3, 3, 1]  # edit costs.
)
ged_env.add_nx_graph(graph1, '')  # add graph1
ged_env.add_nx_graph(graph2, '')  # add graph2
listID = ged_env.get_all_graph_ids()  # get list IDs of graphs
ged_env.init(
    init_type='LAZY_WITHOUT_SHUFFLED_COPIES')  # initialize GED environment.
options = {
    'initialization_method': 'RANDOM',  # or 'NODE', etc.
    'threads': 1  # parallel threads.
}
ged_env.set_method(
    'BIPARTITE',  # GED method.
Ejemplo n.º 3
0
def test_GEDEnv():
    """Test GEDEnv.
	"""
    """**1.   Get dataset.**"""

    from gklearn.utils import Dataset

    # Predefined dataset name, use dataset "MUTAG".
    ds_name = 'MUTAG'

    # Initialize a Dataset.
    dataset = Dataset()
    # Load predefined dataset "MUTAG".
    dataset.load_predefined_dataset(ds_name)
    graph1 = dataset.graphs[0]
    graph2 = dataset.graphs[1]
    """**2.  Compute graph edit distance.**"""

    try:
        from gklearn.ged.env import GEDEnv

        ged_env = GEDEnv()  # initailize GED environment.
        ged_env.set_edit_cost(
            'CONSTANT',  # GED cost type.
            edit_cost_constants=[3, 3, 1, 3, 3, 1]  # edit costs.
        )
        ged_env.add_nx_graph(graph1, '')  # add graph1
        ged_env.add_nx_graph(graph2, '')  # add graph2
        listID = ged_env.get_all_graph_ids()  # get list IDs of graphs
        ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES'
                     )  # initialize GED environment.
        options = {
            'initialization_method': 'RANDOM',  # or 'NODE', etc.
            'threads': 1  # parallel threads.
        }
        ged_env.set_method(
            'BIPARTITE',  # GED method.
            options  # options for GED method.
        )
        ged_env.init_method()  # initialize GED method.

        ged_env.run_method(listID[0], listID[1])  # run.

        pi_forward = ged_env.get_forward_map(listID[0],
                                             listID[1])  # forward map.
        pi_backward = ged_env.get_backward_map(listID[0],
                                               listID[1])  # backward map.
        dis = ged_env.get_upper_bound(listID[0],
                                      listID[1])  # GED bewteen two graphs.

        import networkx as nx
        assert len(pi_forward) == nx.number_of_nodes(graph1), len(
            pi_backward) == nx.number_of_nodes(graph2)

    except Exception as exception:
        assert False, exception
Ejemplo n.º 4
0
	def __gmg_bcu(self):
		"""
		The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG).

		Returns
		-------
		None.

		"""
		# Set up the ged environment.
		ged_env = GEDEnv() # @todo: maybe create a ged_env as a private varible.
		# gedlibpy.restart_env()
		ged_env.set_edit_cost(self.__ged_options['edit_cost'], edit_cost_constants=self.__edit_cost_constants)
		graphs = [self.__clean_graph(g) for g in self._dataset.graphs]
		for g in graphs:
			ged_env.add_nx_graph(g, '')
		graph_ids = ged_env.get_all_graph_ids()
		set_median_id = ged_env.add_graph('set_median')
		gen_median_id = ged_env.add_graph('gen_median')
		ged_env.init(init_type=self.__ged_options['init_option'])
		
		# Set up the madian graph estimator.
		self.__mge = MedianGraphEstimatorPy(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
		self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options)
		options = self.__mge_options.copy()
		if not 'seed' in options:
			options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage.
		options['parallel'] = self.__parallel
		
		# Select the GED algorithm.
		self.__mge.set_options(mge_options_to_string(options))
		self.__mge.set_label_names(node_labels=self._dataset.node_labels, 
					  edge_labels=self._dataset.edge_labels, 
					  node_attrs=self._dataset.node_attrs, 
					  edge_attrs=self._dataset.edge_attrs)
		ged_options = self.__ged_options.copy()
		if self.__parallel:
			ged_options['threads'] = 1
		self.__mge.set_init_method(ged_options['method'], ged_options)
		self.__mge.set_descent_method(ged_options['method'], ged_options)
		
		# Run the estimator.
		self.__mge.run(graph_ids, set_median_id, gen_median_id)
		
		# Get SODs.
		self.__sod_set_median = self.__mge.get_sum_of_distances('initialized')
		self.__sod_gen_median = self.__mge.get_sum_of_distances('converged')
		
		# Get median graphs.
		self.__set_median = ged_env.get_nx_graph(set_median_id)
		self.__gen_median = ged_env.get_nx_graph(gen_median_id)
Ejemplo n.º 5
0
def _compute_ged(dataset, node_label_costs, edge_label_costs):
	from gklearn.ged.env import GEDEnv
	from gklearn.ged.util.util import label_costs_to_matrix
	import networkx as nx
			
	ged_env = GEDEnv() # initailize GED environment.
	ged_env.set_edit_cost('CONSTANT', # GED cost type.
	                      edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs.
						  )
	for g in dataset.graphs:
		ged_env.add_nx_graph(g, '') # add graphs

	node_labels = ged_env.get_all_node_labels()
	edge_labels = ged_env.get_all_edge_labels()
	listID = ged_env.get_all_graph_ids() # get list IDs of graphs
	ged_env.set_label_costs(label_costs_to_matrix(node_label_costs, len(node_labels)), 
					  label_costs_to_matrix(edge_label_costs, len(edge_labels)))
	ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment.
	options = {'initialization_method': 'RANDOM', # or 'NODE', etc.
	           'threads': 1 # parallel threads.
			   }
	ged_env.set_method('BIPARTITE', # GED method.
	                   options # options for GED method.
					   )
	ged_env.init_method() # initialize GED method.
	
	ged_env.run_method(listID[0], listID[1]) # run.
	
	pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map.
	pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map.
	dis = ged_env.get_upper_bound(listID[0], listID[1])	# GED bewteen two graphs.
	
	# make the map label correct (label remove map as np.inf)
	nodes1 = [n for n in dataset.graphs[0].nodes()]
	nodes2 = [n for n in dataset.graphs[1].nodes()]
	nb1 = nx.number_of_nodes(dataset.graphs[0])
	nb2 = nx.number_of_nodes(dataset.graphs[1])
	pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
	pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
	
	return pi_forward, pi_backward, dis, node_labels, edge_labels
Ejemplo n.º 6
0
def compute_geds_by_GEDEnv(dataset):
    from gklearn.ged.env import GEDEnv
    import numpy as np

    graph1 = dataset.graphs[0]
    graph2 = dataset.graphs[1]

    ged_env = GEDEnv()  # initailize GED environment.
    ged_env.set_edit_cost(
        'CONSTANT',  # GED cost type.
        edit_cost_constants=[3, 3, 1, 3, 3, 1]  # edit costs.
    )
    for g in dataset.graphs[0:10]:
        ged_env.add_nx_graph(g, '')


# 	ged_env.add_nx_graph(graph1, '') # add graph1
# 	ged_env.add_nx_graph(graph2, '') # add graph2
    listID = ged_env.get_all_graph_ids()  # get list IDs of graphs
    ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES'
                 )  # initialize GED environment.
    options = {
        'threads': 1  # parallel threads.
    }
    ged_env.set_method(
        'BIPARTITE',  # GED method.
        options  # options for GED method.
    )
    ged_env.init_method()  # initialize GED method.

    ged_mat = np.empty((10, 10))
    for i in range(0, 10):
        for j in range(i, 10):
            ged_env.run_method(i, j)  # run.
            ged_mat[i, j] = ged_env.get_upper_bound(i, j)
            ged_mat[j, i] = ged_mat[i, j]

    results = {}
    results['pi_forward'] = ged_env.get_forward_map(listID[0],
                                                    listID[1])  # forward map.
    results['pi_backward'] = ged_env.get_backward_map(
        listID[0], listID[1])  # backward map.
    results['upper_bound'] = ged_env.get_upper_bound(
        listID[0], listID[1])  # GED bewteen two graphs.
    results['runtime'] = ged_env.get_runtime(listID[0], listID[1])
    results['init_time'] = ged_env.get_init_time()
    results['ged_mat'] = ged_mat

    return results