예제 #1
0
	def _gmg_bcu(self):
		"""
		The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG).

		Returns
		-------
		None.

		"""
		# Set up the ged environment.
		ged_env = GEDEnv() # @todo: maybe create a ged_env as a private varible.
		# gedlibpy.restart_env()
		ged_env.set_edit_cost(self._ged_options['edit_cost'], edit_cost_constants=self._edit_cost_constants)
		graphs = [self._clean_graph(g) for g in self._dataset.graphs]
		for g in graphs:
			ged_env.add_nx_graph(g, '')
		graph_ids = ged_env.get_all_graph_ids()
	
		node_labels = ged_env.get_all_node_labels()
		edge_labels =  ged_env.get_all_edge_labels()
		node_label_costs = label_costs_to_matrix(self._node_label_costs, len(node_labels))
		edge_label_costs = label_costs_to_matrix(self._edge_label_costs, len(edge_labels))
		ged_env.set_label_costs(node_label_costs, edge_label_costs)
	
		set_median_id = ged_env.add_graph('set_median')
		gen_median_id = ged_env.add_graph('gen_median')
		ged_env.init(init_type=self._ged_options['init_option'])
		
		# Set up the madian graph estimator.
		self._mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self._ged_options['edit_cost']))
		self._mge.set_refine_method(self._ged_options['method'], self._ged_options)
		options = self._mge_options.copy()
		if not 'seed' in options:
			options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage.
		options['parallel'] = self._parallel
		
		# Select the GED algorithm.
		self._mge.set_options(mge_options_to_string(options))
		self._mge.set_label_names(node_labels=self._dataset.node_labels, 
					  edge_labels=self._dataset.edge_labels, 
					  node_attrs=self._dataset.node_attrs, 
					  edge_attrs=self._dataset.edge_attrs)
		ged_options = self._ged_options.copy()
		if self._parallel:
			ged_options['threads'] = 1
		self._mge.set_init_method(ged_options['method'], ged_options)
		self._mge.set_descent_method(ged_options['method'], ged_options)
		
		# Run the estimator.
		self._mge.run(graph_ids, set_median_id, gen_median_id)
		
		# Get SODs.
		self._sod_set_median = self._mge.get_sum_of_distances('initialized')
		self._sod_gen_median = self._mge.get_sum_of_distances('converged')
		
		# Get median graphs.
		self._set_median = ged_env.get_nx_graph(set_median_id)
		self._gen_median = ged_env.get_nx_graph(gen_median_id)
def test_median_graph_estimator_symb():
	from gklearn.utils import load_dataset
	from gklearn.ged.median import MedianGraphEstimator, constant_node_costs
	from gklearn.gedlib import librariesImport, gedlibpy
	from gklearn.preimage.utils import get_same_item_indices
	import multiprocessing

	# estimator parameters.
	init_type = 'MEDOID'
	num_inits = 1
	threads = multiprocessing.cpu_count()
	time_limit = 60000
	
	# algorithm parameters.
	algo = 'IPFP'
	initial_solutions = 1
	algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1 --initialization-method NODE '

	edit_cost_name = 'CONSTANT'
	edit_cost_constants = [4, 4, 2, 1, 1, 1]
	ds_name = 'MUTAG'
	
	# Load dataset.
	dataset = '../../../datasets/MUTAG/MUTAG_A.txt'
	Gn, y_all, label_names = load_dataset(dataset)
	y_idx = get_same_item_indices(y_all)
	for i, (y, values) in enumerate(y_idx.items()):
		Gn_i = [Gn[val] for val in values]
		break
	Gn_i = Gn_i[0:10]
	
	# Set up the environment.
	ged_env = gedlibpy.GEDEnv()
	# gedlibpy.restart_env()
	ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants)
	for G in Gn_i:
		ged_env.add_nx_graph(G, '')
	graph_ids = ged_env.get_all_graph_ids()
	set_median_id = ged_env.add_graph('set_median')
	gen_median_id = ged_env.add_graph('gen_median')
	ged_env.init(init_option='EAGER_WITHOUT_SHUFFLED_COPIES')
	
	# Set up the estimator.
	mge = MedianGraphEstimator(ged_env, constant_node_costs(edit_cost_name))
	mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1')
	
	mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type
	mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1'  + ' --update-order TRUE --refine FALSE --randomness PSEUDO --parallel TRUE '# @todo: std::to_string(rng())
	
	# Select the GED algorithm.
	algo_options = '--threads ' + str(threads) + algo_options_suffix
	mge.set_options(mge_options)
	mge.set_label_names(node_labels=label_names['node_labels'],
					  edge_labels=label_names['edge_labels'], 
					  node_attrs=label_names['node_attrs'], 
					  edge_attrs=label_names['edge_attrs'])
	mge.set_init_method(algo, algo_options)
	mge.set_descent_method(algo, algo_options)
	
	# Run the estimator.
	mge.run(graph_ids, set_median_id, gen_median_id)
	
	# Get SODs.
	sod_sm = mge.get_sum_of_distances('initialized')
	sod_gm = mge.get_sum_of_distances('converged')
	print('sod_sm, sod_gm: ', sod_sm, sod_gm)
	
	# Get median graphs.
	set_median = ged_env.get_nx_graph(set_median_id)
	gen_median = ged_env.get_nx_graph(gen_median_id)
	
	return set_median, gen_median