def _compute_kernel_list_imap_unordered(self, g1, g_list):
		self._check_graphs(g_list + [g1])
		self._add_dummy_labels(g_list + [g1])
		if not self._ds_infos['directed']:  #  convert
			g1 = g1.to_directed()
			g_list = [G.to_directed() for G in g_list]

		# compute kernel list.
		kernel_list = [None] * len(g_list)

# 		def init_worker(g1_toshare, g_list_toshare):
# 			global G_g1, G_g_list
# 			G_g1 = g1_toshare
# 			G_g_list = g_list_toshare

		# direct product graph method - exponential
		if self._compute_method == 'exp':
			do_fun = self._wrapper_kernel_list_do_exp
		# direct product graph method - geometric
		elif self._compute_method == 'geo':
			do_fun = self._wrapper_kernel_list_do_geo

		def func_assign(result, var_to_assign):
			var_to_assign[result[0]] = result[1]
		itr = range(len(g_list))
		len_itr = len(g_list)
		parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
			init_worker=_init_worker_list, glbv=(g1, g_list), method='imap_unordered',
			n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)

		return kernel_list
	def _compute_kernel_list_imap_unordered(self, g1, g_list):
		self._all_graphs_have_edges([g1] + g_list)
		# get shortest path graphs of g1 and each graph in g_list.
		g1 = getSPGraph(g1, edge_weight=self._edge_weight)
		pool = Pool(self._n_jobs)
		get_sp_graphs_fun = self._wrapper_get_sp_graphs
		itr = zip(g_list, range(0, len(g_list)))
		if len(g_list) < 100 * self._n_jobs:
			chunksize = int(len(g_list) / self._n_jobs) + 1
		else:
			chunksize = 100
		iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize),
						desc='getting sp graphs', file=sys.stdout,
						length=len(g_list), verbose=(self._verbose >= 2))
		for i, g in iterator:
			g_list[i] = g
		pool.close()
		pool.join()

		# compute Gram matrix.
		kernel_list = [None] * len(g_list)

		def init_worker(g1_toshare, gl_toshare):
			global G_g1, G_gl
			G_g1 = g1_toshare
			G_gl = gl_toshare
		do_fun = self._wrapper_kernel_list_do
		def func_assign(result, var_to_assign):
			var_to_assign[result[0]] = result[1]
		itr = range(len(g_list))
		len_itr = len(g_list)
		parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
			init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)

		return kernel_list
Exemple #3
0
    def _compute_kernel_list_imap_unordered(self, g1, g_list):
        # get shortest paths of g1 and each graph in g_list.
        sp1 = get_shortest_paths(g1, self.__edge_weight,
                                 self.__ds_infos['directed'])
        splist = [None] * len(g_list)
        pool = Pool(self._n_jobs)
        itr = zip(g_list, range(0, len(g_list)))
        if len(g_list) < 100 * self._n_jobs:
            chunksize = int(len(g_list) / self._n_jobs) + 1
        else:
            chunksize = 100
        # get shortest path graphs of g_list
        if self.__compute_method == 'trie':
            get_sps_fun = self._wrapper_get_sps_trie
        else:
            get_sps_fun = self._wrapper_get_sps_naive
        if self.verbose >= 2:
            iterator = tqdm(pool.imap_unordered(get_sps_fun, itr, chunksize),
                            desc='getting shortest paths',
                            file=sys.stdout)
        else:
            iterator = pool.imap_unordered(get_sps_fun, itr, chunksize)
        for i, sp in iterator:
            splist[i] = sp
        pool.close()
        pool.join()

        # compute Gram matrix.
        kernel_list = [None] * len(g_list)

        def init_worker(sp1_toshare, spl_toshare, g1_toshare, gl_toshare):
            global G_sp1, G_spl, G_g1, G_gl
            G_sp1 = sp1_toshare
            G_spl = spl_toshare
            G_g1 = g1_toshare
            G_gl = gl_toshare

        if self.__compute_method == 'trie':
            do_fun = self.__wrapper_ssp_do_trie
        else:
            do_fun = self._wrapper_kernel_list_do

        def func_assign(result, var_to_assign):
            var_to_assign[result[0]] = result[1]

        itr = range(len(g_list))
        len_itr = len(g_list)
        parallel_me(do_fun,
                    func_assign,
                    kernel_list,
                    itr,
                    len_itr=len_itr,
                    init_worker=init_worker,
                    glbv=(sp1, splist, g1, g_list),
                    method='imap_unordered',
                    n_jobs=self._n_jobs,
                    itr_desc='calculating kernels',
                    verbose=self._verbose)

        return kernel_list
Exemple #4
0
	def _compute_kernel_list_imap_unordered(self, g1, g_list):
		self._check_edge_weight(g_list + [g1], self._verbose)
		self._check_graphs(g_list + [g1])
		if self._verbose >= 2:
			import warnings
			warnings.warn('All labels are ignored. Only works for undirected graphs.')

		# compute kernel list.
		kernel_list = [None] * len(g_list)

		if self._q is None:
			# precompute the spectral decomposition of each graph.
			A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
			D1, P1 = np.linalg.eig(A1)
			P_list = []
			D_list = []
			if self._verbose >= 2:
				iterator = tqdm(g_list, desc='spectral decompose', file=sys.stdout)
			else:
				iterator = g_list
			for G in iterator:
				# don't normalize adjacency matrices if q is a uniform vector. Note
				# A actually is the transpose of the adjacency matrix.
				A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose()
				ew, ev = np.linalg.eig(A)
				D_list.append(ew)
				P_list.append(ev) # @todo: parallel?

			if self._p is None: # p is uniform distribution as default.
				q_T1 = 1 / nx.number_of_nodes(g1)
				q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] # @todo: parallel?

				def init_worker(q_T1_toshare, P1_toshare, D1_toshare, q_T_list_toshare, P_list_toshare, D_list_toshare):
					global G_q_T1, G_P1, G_D1, G_q_T_list, G_P_list, G_D_list
					G_q_T1 = q_T1_toshare
					G_P1 = P1_toshare
					G_D1 = D1_toshare
					G_q_T_list = q_T_list_toshare
					G_P_list = P_list_toshare
					G_D_list = D_list_toshare

				do_fun = self._wrapper_kernel_list_do

				def func_assign(result, var_to_assign):
					var_to_assign[result[0]] = result[1]
				itr = range(len(g_list))
				len_itr = len(g_list)
				parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
					init_worker=init_worker, glbv=(q_T1, P1, D1, q_T_list, P_list, D_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)

			else: # @todo
				pass
		else: # @todo
			pass

		return kernel_list
Exemple #5
0
    def _compute_kernel_list_imap_unordered(self, g1, g_list):
        self._add_dummy_labels(g_list + [g1])

        if self._remove_totters:
            g1 = untotterTransformation(
                g1, self._node_labels,
                self._edge_labels)  # @todo: this may not work.
            pool = Pool(self._n_jobs)
            itr = range(0, len(g_list))
            if len(g_list) < 100 * self._n_jobs:
                chunksize = int(len(g_list) / self._n_jobs) + 1
            else:
                chunksize = 100
            remove_fun = self._wrapper_untotter
            iterator = get_iters(pool.imap_unordered(remove_fun, itr,
                                                     chunksize),
                                 desc='removing tottering',
                                 file=sys.stdout,
                                 length=len(g_list),
                                 verbose=(self._verbose >= 2))
            for i, g in iterator:
                g_list[i] = g
            pool.close()
            pool.join()

        # compute kernel list.
        kernel_list = [None] * len(g_list)

        def init_worker(g1_toshare, g_list_toshare):
            global G_g1, G_g_list
            G_g1 = g1_toshare
            G_g_list = g_list_toshare

        do_fun = self._wrapper_kernel_list_do

        def func_assign(result, var_to_assign):
            var_to_assign[result[0]] = result[1]

        itr = range(len(g_list))
        len_itr = len(g_list)
        parallel_me(do_fun,
                    func_assign,
                    kernel_list,
                    itr,
                    len_itr=len_itr,
                    init_worker=init_worker,
                    glbv=(g1, g_list),
                    method='imap_unordered',
                    n_jobs=self._n_jobs,
                    itr_desc='Computing kernels',
                    verbose=self._verbose)

        return kernel_list
Exemple #6
0
    def _compute_kernel_list_imap_unordered(self, g1, g_list):
        self.__add_dummy_labels(g_list + [g1])

        # get all canonical keys of all graphs before calculating kernels to save
        # time, but this may cost a lot of memory for large dataset.
        canonkeys_1 = self.__get_canonkeys(g1)
        canonkeys_list = [[] for _ in range(len(g_list))]
        pool = Pool(self._n_jobs)
        itr = zip(g_list, range(0, len(g_list)))
        if len(g_list) < 100 * self._n_jobs:
            chunksize = int(len(g_list) / self._n_jobs) + 1
        else:
            chunksize = 100
        get_fun = self._wrapper_get_canonkeys
        if self._verbose >= 2:
            iterator = tqdm(pool.imap_unordered(get_fun, itr, chunksize),
                            desc='getting canonkeys',
                            file=sys.stdout)
        else:
            iterator = pool.imap_unordered(get_fun, itr, chunksize)
        for i, ck in iterator:
            canonkeys_list[i] = ck
        pool.close()
        pool.join()

        # compute kernel list.
        kernel_list = [None] * len(g_list)

        def init_worker(ck_1_toshare, ck_list_toshare):
            global G_ck_1, G_ck_list
            G_ck_1 = ck_1_toshare
            G_ck_list = ck_list_toshare

        do_fun = self._wrapper_kernel_list_do

        def func_assign(result, var_to_assign):
            var_to_assign[result[0]] = result[1]

        itr = range(len(g_list))
        len_itr = len(g_list)
        parallel_me(do_fun,
                    func_assign,
                    kernel_list,
                    itr,
                    len_itr=len_itr,
                    init_worker=init_worker,
                    glbv=(canonkeys_1, canonkeys_list),
                    method='imap_unordered',
                    n_jobs=self._n_jobs,
                    itr_desc='calculating kernels',
                    verbose=self._verbose)

        return kernel_list
Exemple #7
0
    def _compute_kernel_list_imap_unordered(self, g1, g_list):
        self._check_edge_weight(g_list + [g1], self._verbose)
        self._check_graphs(g_list + [g1])

        # compute kernel list.
        kernel_list = [None] * len(g_list)

        # Reindex nodes using consecutive integers for the convenience of kernel computation.
        g1 = nx.convert_node_labels_to_integers(
            g1, first_label=0, label_attribute='label_orignal')
        # @todo: parallel this.
        iterator = get_iters(g_list,
                             desc='Reindex vertices',
                             file=sys.stdout,
                             verbose=(self._verbose >= 2))
        g_list = [
            nx.convert_node_labels_to_integers(g,
                                               first_label=0,
                                               label_attribute='label_orignal')
            for g in iterator
        ]

        if self._p is None and self._q is None:  # p and q are uniform distributions as default.

            def init_worker(g1_toshare, g_list_toshare):
                global G_g1, G_g_list
                G_g1 = g1_toshare
                G_g_list = g_list_toshare

            do_fun = self._wrapper_kernel_list_do

            def func_assign(result, var_to_assign):
                var_to_assign[result[0]] = result[1]

            itr = range(len(g_list))
            len_itr = len(g_list)
            parallel_me(do_fun,
                        func_assign,
                        kernel_list,
                        itr,
                        len_itr=len_itr,
                        init_worker=init_worker,
                        glbv=(g1, g_list),
                        method='imap_unordered',
                        n_jobs=self._n_jobs,
                        itr_desc='Computing kernels',
                        verbose=self._verbose)

        else:  # @todo
            pass

        return kernel_list
	def _compute_kernel_list_imap_unordered(self, g1, g_list):
		self._add_dummy_labels(g_list + [g1])

		# get all paths of all graphs before computing kernels to save time,
		# but this may cost a lot of memory for large datasets.
		pool = Pool(self._n_jobs)
		itr = zip(g_list, range(0, len(g_list)))
		if len(g_list) < 100 * self._n_jobs:
			chunksize = int(len(g_list) / self._n_jobs) + 1
		else:
			chunksize = 100
		paths_g_list = [[] for _ in range(len(g_list))]
		if self._compute_method == 'trie' and self._k_func is not None:
			paths_g1 = self._find_all_path_as_trie(g1)
			get_ps_fun = self._wrapper_find_all_path_as_trie
		elif self._compute_method != 'trie' and self._k_func is not None:
			paths_g1 = self._find_all_paths_until_length(g1)
			get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True)
		else:
			paths_g1 = self._find_all_paths_until_length(g1)
			get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False)
		iterator = get_iters(pool.imap_unordered(get_ps_fun, itr, chunksize),
						desc='getting paths', file=sys.stdout,
						length=len(g_list), verbose=(self._verbose >= 2))
		for i, ps in iterator:
			paths_g_list[i] = ps
		pool.close()
		pool.join()

		# compute kernel list.
		kernel_list = [None] * len(g_list)

		def init_worker(p1_toshare, plist_toshare):
			global G_p1, G_plist
			G_p1 = p1_toshare
			G_plist = plist_toshare
		do_fun = self._wrapper_kernel_list_do
		def func_assign(result, var_to_assign):
			var_to_assign[result[0]] = result[1]
		itr = range(len(g_list))
		len_itr = len(g_list)
		parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
			init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)

		return kernel_list
	def _compute_kernel_list_imap_unordered(self, g1, g_list):
		self._check_edge_weight(g_list + [g1], self._verbose)
		self._check_graphs(g_list + [g1])
		if self._verbose >= 2:
			import warnings
			warnings.warn('All labels are ignored.')

		# compute kernel list.
		kernel_list = [None] * len(g_list)

		if self._q is None:
			# don't normalize adjacency matrices if q is a uniform vector. Note
			# A_wave_list actually contains the transposes of the adjacency matrices.
			A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
			iterator = get_iters(g_list, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2))
			A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel?

			if self._p is None: # p is uniform distribution as default.
				def init_worker(A_wave_1_toshare, A_wave_list_toshare):
					global G_A_wave_1, G_A_wave_list
					G_A_wave_1 = A_wave_1_toshare
					G_A_wave_list = A_wave_list_toshare

				do_fun = self._wrapper_kernel_list_do

				def func_assign(result, var_to_assign):
					var_to_assign[result[0]] = result[1]
				itr = range(len(g_list))
				len_itr = len(g_list)
				parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
					init_worker=init_worker, glbv=(A_wave_1, A_wave_list), method='imap_unordered',
					n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)

			else: # @todo
				pass
		else: # @todo
			pass

		return kernel_list
Exemple #10
0
	def _compute_kernel_list_imap_unordered(self, g1, g_list):
		self._add_dummy_node_labels(g_list + [g1])

		if self._base_kernel == 'subtree':
			kernel_list = [None] * len(g_list)

			def init_worker(g1_toshare, g_list_toshare):
				global G_g1, G_g_list
				G_g1 = g1_toshare
				G_g_list = g_list_toshare
			do_fun = self._wrapper_kernel_list_do
			def func_assign(result, var_to_assign):
				var_to_assign[result[0]] = result[1]
			itr = range(len(g_list))
			len_itr = len(g_list)
			parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
				init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
				n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
			return kernel_list
		else:
			if self._verbose >= 2:
				import warnings
				warnings.warn('This base kernel is not parallelized. The serial computation is used instead.')
			return self._compute_kernel_list_series(g1, g_list)