Example #1
0
def compute_ged(g1, g2, options):
    from gklearn.gedlib import librariesImport, gedlibpy

    ged_env = gedlibpy.GEDEnv()
    ged_env.set_edit_cost(options['edit_cost'],
                          edit_cost_constant=options['edit_cost_constants'])
    ged_env.add_nx_graph(g1, '')
    ged_env.add_nx_graph(g2, '')
    listID = ged_env.get_all_graph_ids()
    ged_env.init(init_type=options['init_option'])
    ged_env.set_method(options['method'], ged_options_to_string(options))
    ged_env.init_method()

    g = listID[0]
    h = listID[1]
    ged_env.run_method(g, h)
    pi_forward = ged_env.get_forward_map(g, h)
    pi_backward = ged_env.get_backward_map(g, h)
    upper = ged_env.get_upper_bound(g, h)
    dis = upper

    # make the map label correct (label remove map as np.inf)
    nodes1 = [n for n in g1.nodes()]
    nodes2 = [n for n in g2.nodes()]
    nb1 = nx.number_of_nodes(g1)
    nb2 = nx.number_of_nodes(g2)
    pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
    pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
    #		print(pi_forward)

    return dis, pi_forward, pi_backward
Example #2
0
	def __gmg_bcu(self):
		"""
		The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG).

		Returns
		-------
		None.

		"""
		# Set up the ged environment.
		ged_env = gedlibpy.GEDEnv() # @todo: maybe create a ged_env as a private varible.
		# gedlibpy.restart_env()
		ged_env.set_edit_cost(self.__ged_options['edit_cost'], edit_cost_constant=self.__edit_cost_constants)
		graphs = [self.__clean_graph(g) for g in self._dataset.graphs]
		for g in graphs:
			ged_env.add_nx_graph(g, '')
		graph_ids = ged_env.get_all_graph_ids()
		set_median_id = ged_env.add_graph('set_median')
		gen_median_id = ged_env.add_graph('gen_median')
		ged_env.init(init_option=self.__ged_options['init_option'])
		
		# Set up the madian graph estimator.
		self.__mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
		self.__mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
		options = self.__mge_options.copy()
		if not 'seed' in options:
			options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage.
		options['parallel'] = self.__parallel
		
		# Select the GED algorithm.
		self.__mge.set_options(mge_options_to_string(options))
		self.__mge.set_label_names(node_labels=self._dataset.node_labels, 
					  edge_labels=self._dataset.edge_labels, 
					  node_attrs=self._dataset.node_attrs, 
					  edge_attrs=self._dataset.edge_attrs)
		ged_options = self.__ged_options.copy()
		if self.__parallel:
			ged_options['threads'] = 1
		self.__mge.set_init_method(ged_options['method'], ged_options_to_string(ged_options))
		self.__mge.set_descent_method(ged_options['method'], ged_options_to_string(ged_options))
		
		# Run the estimator.
		self.__mge.run(graph_ids, set_median_id, gen_median_id)
		
		# Get SODs.
		self.__sod_set_median = self.__mge.get_sum_of_distances('initialized')
		self.__sod_gen_median = self.__mge.get_sum_of_distances('converged')
		
		# Get median graphs.
		self.__set_median = ged_env.get_nx_graph(set_median_id)
		self.__gen_median = ged_env.get_nx_graph(gen_median_id)
Example #3
0
def compute_geds_by_GEDLIB(dataset):
    from gklearn.gedlib import librariesImport, gedlibpy
    from gklearn.ged.util import ged_options_to_string
    import numpy as np

    graph1 = dataset.graphs[5]
    graph2 = dataset.graphs[6]

    ged_env = gedlibpy.GEDEnv()  # initailize GED environment.
    ged_env.set_edit_cost(
        'CONSTANT',  # GED cost type.
        edit_cost_constant=[3, 3, 1, 3, 3, 1]  # edit costs.
    )
    # 	ged_env.add_nx_graph(graph1, '') # add graph1
    # 	ged_env.add_nx_graph(graph2, '') # add graph2
    for g in dataset.graphs[0:10]:
        ged_env.add_nx_graph(g, '')
    listID = ged_env.get_all_graph_ids()  # get list IDs of graphs
    ged_env.init(init_option='LAZY_WITHOUT_SHUFFLED_COPIES'
                 )  # initialize GED environment.
    options = {
        'initialization-method': 'RANDOM',  # or 'NODE', etc.
        'threads': 1  # parallel threads.
    }
    ged_env.set_method(
        'BIPARTITE',  # GED method.
        ged_options_to_string(options)  # options for GED method.
    )
    ged_env.init_method()  # initialize GED method.

    ged_mat = np.empty((10, 10))
    for i in range(0, 10):
        for j in range(i, 10):
            ged_env.run_method(i, j)  # run.
            ged_mat[i, j] = ged_env.get_upper_bound(i, j)
            ged_mat[j, i] = ged_mat[i, j]

    results = {}
    results['pi_forward'] = ged_env.get_forward_map(listID[0],
                                                    listID[1])  # forward map.
    results['pi_backward'] = ged_env.get_backward_map(
        listID[0], listID[1])  # backward map.
    results['upper_bound'] = ged_env.get_upper_bound(
        listID[0], listID[1])  # GED bewteen two graphs.
    results['runtime'] = ged_env.get_runtime(listID[0], listID[1])
    results['init_time'] = ged_env.get_init_time()
    results['ged_mat'] = ged_mat

    return results
Example #4
0
def compute_geds(graphs, options={}, sort=True, parallel=False, verbose=True):
    from gklearn.gedlib import librariesImport, gedlibpy

    # initialize ged env.
    ged_env = gedlibpy.GEDEnv()
    ged_env.set_edit_cost(options['edit_cost'],
                          edit_cost_constant=options['edit_cost_constants'])
    for g in graphs:
        ged_env.add_nx_graph(g, '')
    listID = ged_env.get_all_graph_ids()
    ged_env.init()
    if parallel:
        options['threads'] = 1
    ged_env.set_method(options['method'], ged_options_to_string(options))
    ged_env.init_method()

    # compute ged.
    neo_options = {
        'edit_cost': options['edit_cost'],
        'node_labels': options['node_labels'],
        'edge_labels': options['edge_labels'],
        'node_attrs': options['node_attrs'],
        'edge_attrs': options['edge_attrs']
    }
    ged_mat = np.zeros((len(graphs), len(graphs)))
    if parallel:
        len_itr = int(len(graphs) * (len(graphs) - 1) / 2)
        ged_vec = [0 for i in range(len_itr)]
        n_edit_operations = [0 for i in range(len_itr)]
        itr = combinations(range(0, len(graphs)), 2)
        n_jobs = multiprocessing.cpu_count()
        if len_itr < 100 * n_jobs:
            chunksize = int(len_itr / n_jobs) + 1
        else:
            chunksize = 100

        def init_worker(graphs_toshare, ged_env_toshare, listID_toshare):
            global G_graphs, G_ged_env, G_listID
            G_graphs = graphs_toshare
            G_ged_env = ged_env_toshare
            G_listID = listID_toshare

        do_partial = partial(_wrapper_compute_ged_parallel, neo_options, sort)
        pool = Pool(processes=n_jobs,
                    initializer=init_worker,
                    initargs=(graphs, ged_env, listID))
        if verbose:
            iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
                            desc='computing GEDs',
                            file=sys.stdout)
        else:
            iterator = pool.imap_unordered(do_partial, itr, chunksize)
#		iterator = pool.imap_unordered(do_partial, itr, chunksize)
        for i, j, dis, n_eo_tmp in iterator:
            idx_itr = int(len(graphs) * i + j - (i + 1) * (i + 2) / 2)
            ged_vec[idx_itr] = dis
            ged_mat[i][j] = dis
            ged_mat[j][i] = dis
            n_edit_operations[idx_itr] = n_eo_tmp


#			print('\n-------------------------------------------')
#			print(i, j, idx_itr, dis)
        pool.close()
        pool.join()

    else:
        ged_vec = []
        n_edit_operations = []
        if verbose:
            iterator = tqdm(range(len(graphs)),
                            desc='computing GEDs',
                            file=sys.stdout)
        else:
            iterator = range(len(graphs))
        for i in iterator:
            #		for i in range(len(graphs)):
            for j in range(i + 1, len(graphs)):
                if nx.number_of_nodes(graphs[i]) <= nx.number_of_nodes(
                        graphs[j]) or not sort:
                    dis, pi_forward, pi_backward = _compute_ged(
                        ged_env, listID[i], listID[j], graphs[i], graphs[j])
                else:
                    dis, pi_backward, pi_forward = _compute_ged(
                        ged_env, listID[j], listID[i], graphs[j], graphs[i])
                ged_vec.append(dis)
                ged_mat[i][j] = dis
                ged_mat[j][i] = dis
                n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j],
                                                  pi_forward, pi_backward,
                                                  **neo_options)
                n_edit_operations.append(n_eo_tmp)

    return ged_vec, ged_mat, n_edit_operations
def test_median_graph_estimator_symb():
	from gklearn.utils import load_dataset
	from gklearn.ged.median import MedianGraphEstimator, constant_node_costs
	from gklearn.gedlib import librariesImport, gedlibpy
	from gklearn.preimage.utils import get_same_item_indices
	import multiprocessing

	# estimator parameters.
	init_type = 'MEDOID'
	num_inits = 1
	threads = multiprocessing.cpu_count()
	time_limit = 60000
	
	# algorithm parameters.
	algo = 'IPFP'
	initial_solutions = 1
	algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1 --initialization-method NODE '

	edit_cost_name = 'CONSTANT'
	edit_cost_constants = [4, 4, 2, 1, 1, 1]
	ds_name = 'MUTAG'
	
	# Load dataset.
	dataset = '../../../datasets/MUTAG/MUTAG_A.txt'
	Gn, y_all, label_names = load_dataset(dataset)
	y_idx = get_same_item_indices(y_all)
	for i, (y, values) in enumerate(y_idx.items()):
		Gn_i = [Gn[val] for val in values]
		break
	Gn_i = Gn_i[0:10]
	
	# Set up the environment.
	ged_env = gedlibpy.GEDEnv()
	# gedlibpy.restart_env()
	ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants)
	for G in Gn_i:
		ged_env.add_nx_graph(G, '')
	graph_ids = ged_env.get_all_graph_ids()
	set_median_id = ged_env.add_graph('set_median')
	gen_median_id = ged_env.add_graph('gen_median')
	ged_env.init(init_option='EAGER_WITHOUT_SHUFFLED_COPIES')
	
	# Set up the estimator.
	mge = MedianGraphEstimator(ged_env, constant_node_costs(edit_cost_name))
	mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1')
	
	mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type
	mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1'  + ' --update-order TRUE --refine FALSE --randomness PSEUDO --parallel TRUE '# @todo: std::to_string(rng())
	
	# Select the GED algorithm.
	algo_options = '--threads ' + str(threads) + algo_options_suffix
	mge.set_options(mge_options)
	mge.set_label_names(node_labels=label_names['node_labels'],
					  edge_labels=label_names['edge_labels'], 
					  node_attrs=label_names['node_attrs'], 
					  edge_attrs=label_names['edge_attrs'])
	mge.set_init_method(algo, algo_options)
	mge.set_descent_method(algo, algo_options)
	
	# Run the estimator.
	mge.run(graph_ids, set_median_id, gen_median_id)
	
	# Get SODs.
	sod_sm = mge.get_sum_of_distances('initialized')
	sod_gm = mge.get_sum_of_distances('converged')
	print('sod_sm, sod_gm: ', sod_sm, sod_gm)
	
	# Get median graphs.
	set_median = ged_env.get_nx_graph(set_median_id)
	gen_median = ged_env.get_nx_graph(gen_median_id)
	
	return set_median, gen_median