def _compute_kernel_list_series(self, g1, g_list):
		self._all_graphs_have_edges([g1] + g_list)
		# get shortest path graphs of g1 and each graph in g_list.
		g1 = getSPGraph(g1, edge_weight=self._edge_weight)
		iterator = get_iters(g_list, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2))
		g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator]

		# compute kernel list.
		kernel_list = [None] * len(g_list)
		iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))
		for i in iterator:
			kernel = self._sp_do(g1, g_list[i])
			kernel_list[i] = kernel

		return kernel_list
	def _compute_kernel_list_imap_unordered(self, g1, g_list):
		self._all_graphs_have_edges([g1] + g_list)
		# get shortest path graphs of g1 and each graph in g_list.
		g1 = getSPGraph(g1, edge_weight=self._edge_weight)
		pool = Pool(self._n_jobs)
		get_sp_graphs_fun = self._wrapper_get_sp_graphs
		itr = zip(g_list, range(0, len(g_list)))
		if len(g_list) < 100 * self._n_jobs:
			chunksize = int(len(g_list) / self._n_jobs) + 1
		else:
			chunksize = 100
		iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize),
						desc='getting sp graphs', file=sys.stdout,
						length=len(g_list), verbose=(self._verbose >= 2))
		for i, g in iterator:
			g_list[i] = g
		pool.close()
		pool.join()

		# compute Gram matrix.
		kernel_list = [None] * len(g_list)

		def init_worker(g1_toshare, gl_toshare):
			global G_g1, G_gl
			G_g1 = g1_toshare
			G_gl = gl_toshare
		do_fun = self._wrapper_kernel_list_do
		def func_assign(result, var_to_assign):
			var_to_assign[result[0]] = result[1]
		itr = range(len(g_list))
		len_itr = len(g_list)
		parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
			init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)

		return kernel_list
Beispiel #3
0
    def _compute_gm_series(self):
        # get shortest path graph of each graph.
        if self._verbose >= 2:
            iterator = tqdm(self._graphs,
                            desc='getting sp graphs',
                            file=sys.stdout)
        else:
            iterator = self._graphs
        self._graphs = [
            getSPGraph(g, edge_weight=self.__edge_weight) for g in iterator
        ]

        # compute Gram matrix.
        gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

        from itertools import combinations_with_replacement
        itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
        if self._verbose >= 2:
            iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
        else:
            iterator = itr
        for i, j in iterator:
            kernel = self.__sp_do(self._graphs[i], self._graphs[j])
            gram_matrix[i][j] = kernel
            gram_matrix[j][i] = kernel

        return gram_matrix
Beispiel #4
0
    def _compute_kernel_list_series(self, g1, g_list):
        # get shortest path graphs of g1 and each graph in g_list.
        g1 = getSPGraph(g1, edge_weight=self.__edge_weight)
        if self._verbose >= 2:
            iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout)
        else:
            iterator = g_list
        g_list = [
            getSPGraph(g, edge_weight=self.__edge_weight) for g in iterator
        ]

        # compute kernel list.
        kernel_list = [None] * len(g_list)
        if self._verbose >= 2:
            iterator = tqdm(range(len(g_list)),
                            desc='calculating kernels',
                            file=sys.stdout)
        else:
            iterator = range(len(g_list))
        for i in iterator:
            kernel = self.__sp_do(g1, g_list[i])
            kernel_list[i] = kernel

        return kernel_list
	def _compute_gm_series(self):
		self._all_graphs_have_edges(self._graphs)
		# get shortest path graph of each graph.
		iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2))
		self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator]

		# compute Gram matrix.
		gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

		from itertools import combinations_with_replacement
		itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
		len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
		iterator = get_iters(itr, desc='Computing kernels',
					length=len_itr, file=sys.stdout,verbose=(self._verbose >= 2))
		for i, j in iterator:
			kernel = self._sp_do(self._graphs[i], self._graphs[j])
			gram_matrix[i][j] = kernel
			gram_matrix[j][i] = kernel

		return gram_matrix
Beispiel #6
0
	def _compute_gm_series(self):
		self._all_graphs_have_edges(self._graphs)
		# get shortest path graph of each graph.
		iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2))
		self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator]


		results = load_results(self._file_name, self._fcsp)

		# compute Gram matrix.
		gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

		from itertools import combinations_with_replacement
		itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
		len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
		iterator = get_iters(itr, desc='Computing kernels',
					length=len_itr, file=sys.stdout,verbose=(self._verbose >= 2))

		time0 = time.time()
		for i, j in iterator:
			if i > results['i'] or (i == results['i'] and j > results['j']):
				data = self._sp_do_space(self._graphs[i], self._graphs[j])
				if self._fcsp:
					results['nb_comparison'].append(data[0])
					if data[1] != {}:
						results['vk_dict_mem'].append(estimate_vk_memory(data[1],
								    nx.number_of_nodes(self._graphs[i]),
									nx.number_of_nodes(self._graphs[j])))
				else:
					results['nb_comparison'].append(data)
				results['i'] = i
				results['j'] = j

				time1 = time.time()
				if time1 - time0 > 600:
					save_results(self._file_name, results)
					time0 = time1

		compute_stats(self._file_name, results)

		return gram_matrix
Beispiel #7
0
def wrapper_getSPGraph(weight, itr_item):
    g = itr_item[0]
    i = itr_item[1]
    return i, getSPGraph(g, edge_weight=weight)
	def _wrapper_get_sp_graphs(self, itr_item):
		g = itr_item[0]
		i = itr_item[1]
		return i, getSPGraph(g, edge_weight=self._edge_weight)
	def _compute_single_kernel_series(self, g1, g2):
		self._all_graphs_have_edges([g1] + [g2])
		g1 = getSPGraph(g1, edge_weight=self._edge_weight)
		g2 = getSPGraph(g2, edge_weight=self._edge_weight)
		kernel = self._sp_do(g1, g2)
		return kernel
Beispiel #10
0
	def _wl_spkernel_do(Gn, node_label, edge_label, height):
		"""Compute Weisfeiler-Lehman shortest path kernels between graphs.

		Parameters
		----------
		Gn : List of NetworkX graph
			List of graphs between which the kernels are computed.
		node_label : string
			node attribute used as label.
		edge_label : string
			edge attribute used as label.
		height : int
			subtree height.

		Return
		------
		gram_matrix : Numpy matrix
			Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
		"""
		pass
		from gklearn.utils.utils import getSPGraph

		# init.
		height = int(height)
		gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel

		Gn = [ getSPGraph(G, edge_weight = edge_label) for G in Gn ] # get shortest path graphs of Gn

		# initial for height = 0
		for i in range(0, len(Gn)):
			for j in range(i, len(Gn)):
				for e1 in Gn[i].edges(data = True):
					for e2 in Gn[j].edges(data = True):
						if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
							gram_matrix[i][j] += 1
				gram_matrix[j][i] = gram_matrix[i][j]

		# iterate each height
		for h in range(1, height + 1):
			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
			for G in Gn: # for each graph
				set_multisets = []
				for node in G.nodes(data = True):
					# Multiset-label determination.
					multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
					# sorting each multiset
					multiset.sort()
					multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
					set_multisets.append(multiset)

				# label compression
				set_unique = list(set(set_multisets)) # set of unique multiset labels
				# a dictionary mapping original labels to new ones.
				set_compressed = {}
				# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
				for value in set_unique:
					if value in all_set_compressed.keys():
						set_compressed.update({ value : all_set_compressed[value] })
					else:
						set_compressed.update({ value : str(num_of_labels_occured + 1) })
						num_of_labels_occured += 1

				all_set_compressed.update(set_compressed)

				# relabel nodes
				for node in G.nodes(data = True):
					node[1][node_label] = set_compressed[set_multisets[node[0]]]

			# Compute subtree kernel with h iterations and add it to the final kernel
			for i in range(0, len(Gn)):
				for j in range(i, len(Gn)):
					for e1 in Gn[i].edges(data = True):
						for e2 in Gn[j].edges(data = True):
							if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
								gram_matrix[i][j] += 1
					gram_matrix[j][i] = gram_matrix[i][j]

		return gram_matrix
Beispiel #11
0
 def _compute_single_kernel_series(self, g1, g2):
     g1 = getSPGraph(g1, edge_weight=self.__edge_weight)
     g2 = getSPGraph(g2, edge_weight=self.__edge_weight)
     kernel = self.__sp_do(g1, g2)
     return kernel