Esempio n. 1
0
    def _compute_kernel_list_series(self, g1, g_list):
        self._add_dummy_labels(g_list + [g1])

        if self._remove_totters:
            g1 = untotterTransformation(
                g1, self._node_labels,
                self._edge_labels)  # @todo: this may not work.
            iterator = get_iters(g_list,
                                 desc='removing tottering',
                                 file=sys.stdout,
                                 verbose=(self._verbose >= 2))
            # @todo: this may not work.
            g_list = [
                untotterTransformation(G, self._node_labels, self._edge_labels)
                for G in iterator
            ]

        # compute kernel list.
        kernel_list = [None] * len(g_list)
        iterator = get_iters(range(len(g_list)),
                             desc='Computing kernels',
                             file=sys.stdout,
                             length=len(g_list),
                             verbose=(self._verbose >= 2))
        for i in iterator:
            kernel = self._kernel_do(g1, g_list[i])
            kernel_list[i] = kernel

        return kernel_list
	def _compute_kernel_list_series(self, g1, g_list):
		self._check_edge_weight(g_list + [g1], self._verbose)
		self._check_graphs(g_list + [g1])
		if self._verbose >= 2:
			import warnings
			warnings.warn('All labels are ignored.')

		lmda = self._weight

		# compute kernel list.
		kernel_list = [None] * len(g_list)

		if self._q is None:
			# don't normalize adjacency matrices if q is a uniform vector. Note
			# A_wave_list actually contains the transposes of the adjacency matrices.
			A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
			iterator = get_iters(g_list, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2))
			A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator]

			if self._p is None: # p is uniform distribution as default.
				iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))

				for i in iterator:
					kernel = self._kernel_do(A_wave_1, A_wave_list[i], lmda)
					kernel_list[i] = kernel

			else: # @todo
				pass
		else: # @todo
			pass

		return kernel_list
Esempio n. 3
0
    def _compute_kernel_list_series(self, g1, g_list):
        # get shortest paths of g1 and each graph in g_list.
        sp1 = get_shortest_paths(g1, self._edge_weight,
                                 self._ds_infos['directed'])
        splist = []
        iterator = get_iters(g_list,
                             desc='getting sp graphs',
                             file=sys.stdout,
                             verbose=(self._verbose >= 2))
        if self._compute_method == 'trie':
            for g in iterator:
                splist.append(self._get_sps_as_trie(g))
        else:
            for g in iterator:
                splist.append(
                    get_shortest_paths(g, self._edge_weight,
                                       self._ds_infos['directed']))

        # compute kernel list.
        kernel_list = [None] * len(g_list)
        iterator = get_iters(range(len(g_list)),
                             desc='Computing kernels',
                             file=sys.stdout,
                             length=len(g_list),
                             verbose=(self._verbose >= 2))
        if self._compute_method == 'trie':
            for i in iterator:
                kernel = self._ssp_do_trie(g1, g_list[i], sp1, splist[i])
                kernel_list[i] = kernel
        else:
            for i in iterator:
                kernel = self._ssp_do_naive(g1, g_list[i], sp1, splist[i])
                kernel_list[i] = kernel

        return kernel_list
Esempio n. 4
0
    def _compute_gm_series(self):
        self._add_dummy_labels(self._graphs)

        if self._remove_totters:
            iterator = get_iters(self._graphs,
                                 desc='removing tottering',
                                 file=sys.stdout,
                                 verbose=(self._verbose >= 2))
            # @todo: this may not work.
            self._graphs = [
                untotterTransformation(G, self._node_labels, self._edge_labels)
                for G in iterator
            ]

        # compute Gram matrix.
        gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

        from itertools import combinations_with_replacement
        itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
        len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
        iterator = get_iters(itr,
                             desc='Computing kernels',
                             file=sys.stdout,
                             length=len_itr,
                             verbose=(self._verbose >= 2))
        for i, j in iterator:
            kernel = self._kernel_do(self._graphs[i], self._graphs[j])
            gram_matrix[i][j] = kernel
            gram_matrix[j][i] = kernel  # @todo: no directed graph considered?

        return gram_matrix
Esempio n. 5
0
def compute_D(G_app, edit_cost, G_test=None, ed_method='BIPARTITE', **kwargs):
    import numpy as np
    N = len(G_app)
    D_app = np.zeros((N, N))

    for i, G1 in get_iters(enumerate(G_app),
                           desc='Computing D - app',
                           file=sys.stdout,
                           length=N):
        for j, G2 in enumerate(G_app[i + 1:], i + 1):
            D_app[i, j], _ = compute_ged(G1,
                                         G2,
                                         edit_cost,
                                         method=ed_method,
                                         **kwargs)
            D_app[j, i] = D_app[i, j]
    if (G_test is None):
        return D_app, edit_cost
    else:
        D_test = np.zeros((len(G_test), N))
        for i, G1 in get_iters(enumerate(G_test),
                               desc='Computing D - test',
                               file=sys.stdout,
                               length=len(G_test)):
            for j, G2 in enumerate(G_app):
                D_test[i, j], _ = compute_ged(G1,
                                              G2,
                                              edit_cost,
                                              method=ed_method,
                                              **kwargs)
        return D_app, D_test, edit_cost
Esempio n. 6
0
    def _compute_gm_series(self):
        self._add_dummy_labels(self._graphs)

        # get all canonical keys of all graphs before computing kernels to save
        # time, but this may cost a lot of memory for large dataset.
        canonkeys = []
        iterator = get_iters(self._graphs,
                             desc='getting canonkeys',
                             file=sys.stdout,
                             verbose=(self._verbose >= 2))
        for g in iterator:
            canonkeys.append(self._get_canonkeys(g))

        # compute Gram matrix.
        gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

        from itertools import combinations_with_replacement
        itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
        len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
        iterator = get_iters(itr,
                             desc='Computing kernels',
                             file=sys.stdout,
                             length=len_itr,
                             verbose=(self._verbose >= 2))
        for i, j in iterator:
            kernel = self._kernel_do(canonkeys[i], canonkeys[j])
            gram_matrix[i][j] = kernel
            gram_matrix[j][i] = kernel  # @todo: no directed graph considered?

        return gram_matrix
Esempio n. 7
0
	def _compute_gm_series(self):
		self._add_dummy_labels(self._graphs)

		from itertools import combinations_with_replacement
		itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2)
		iterator_ps = get_iters(range(0, len(self._graphs)), desc='getting paths', file=sys.stdout, length=len(self._graphs), verbose=(self._verbose >= 2))
		len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
		iterator_kernel = get_iters(itr_kernel, desc='Computing kernels',
						   file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2))

		gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

		if self._compute_method == 'trie':
			all_paths = [self._find_all_path_as_trie(self._graphs[i]) for i in iterator_ps]
			for i, j in iterator_kernel:
				kernel = self._kernel_do_trie(all_paths[i], all_paths[j])
				gram_matrix[i][j] = kernel
				gram_matrix[j][i] = kernel
		else:
			all_paths = [self._find_all_paths_until_length(self._graphs[i]) for i in iterator_ps]
			for i, j in iterator_kernel:
				kernel = self._kernel_do_naive(all_paths[i], all_paths[j])
				gram_matrix[i][j] = kernel
				gram_matrix[j][i] = kernel

		return gram_matrix
Esempio n. 8
0
    def _compute_kernel_list_series(self, g1, g_list):
        self._add_dummy_labels(g_list + [g1])

        # get all canonical keys of all graphs before computing kernels to save
        # time, but this may cost a lot of memory for large dataset.
        canonkeys_1 = self._get_canonkeys(g1)
        canonkeys_list = []
        iterator = get_iters(g_list,
                             desc='getting canonkeys',
                             file=sys.stdout,
                             verbose=(self._verbose >= 2))
        for g in iterator:
            canonkeys_list.append(self._get_canonkeys(g))

        # compute kernel list.
        kernel_list = [None] * len(g_list)
        iterator = get_iters(range(len(g_list)),
                             desc='Computing kernels',
                             file=sys.stdout,
                             length=len(g_list),
                             verbose=(self._verbose >= 2))
        for i in iterator:
            kernel = self._kernel_do(canonkeys_1, canonkeys_list[i])
            kernel_list[i] = kernel

        return kernel_list
Esempio n. 9
0
def parallel_me(func,
                func_assign,
                var_to_assign,
                itr,
                len_itr=None,
                init_worker=None,
                glbv=None,
                method=None,
                n_jobs=None,
                chunksize=None,
                itr_desc='',
                verbose=True):
    '''
	'''
    if method == 'imap_unordered':
        if glbv:  # global varibles required.
            #			def init_worker(v_share):
            #				global G_var
            #				G_var = v_share
            if n_jobs == None:
                n_jobs = multiprocessing.cpu_count()
            with Pool(processes=n_jobs, initializer=init_worker,
                      initargs=glbv) as pool:
                if chunksize is None:
                    if len_itr < 100 * n_jobs:
                        chunksize = int(len_itr / n_jobs) + 1
                    else:
                        chunksize = 100

                iterator = get_iters(pool.imap_unordered(func, itr, chunksize),
                                     desc=itr_desc,
                                     file=sys.stdout,
                                     length=len_itr,
                                     verbose=(verbose >= 2))
                for result in iterator:
                    func_assign(result, var_to_assign)
            pool.close()
            pool.join()
        else:
            if n_jobs == None:
                n_jobs = multiprocessing.cpu_count()
            with Pool(processes=n_jobs) as pool:
                if chunksize is None:
                    if len_itr < 100 * n_jobs:
                        chunksize = int(len_itr / n_jobs) + 1
                    else:
                        chunksize = 100
                iterator = get_iters(pool.imap_unordered(func, itr, chunksize),
                                     desc=itr_desc,
                                     file=sys.stdout,
                                     length=len_itr,
                                     verbose=(verbose >= 2))
                for result in iterator:
                    func_assign(result, var_to_assign)
            pool.close()
            pool.join()
Esempio n. 10
0
	def _compute_kernel_list_series(self, g1, g_list):
		self._all_graphs_have_edges([g1] + g_list)
		# get shortest path graphs of g1 and each graph in g_list.
		g1 = getSPGraph(g1, edge_weight=self._edge_weight)
		iterator = get_iters(g_list, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2))
		g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator]

		# compute kernel list.
		kernel_list = [None] * len(g_list)
		iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))
		for i in iterator:
			kernel = self._sp_do(g1, g_list[i])
			kernel_list[i] = kernel

		return kernel_list
Esempio n. 11
0
	def _compute_gm_imap_unordered(self):
		self._all_graphs_have_edges(self._graphs)
		# get shortest path graph of each graph.
		pool = Pool(self._n_jobs)
		get_sp_graphs_fun = self._wrapper_get_sp_graphs
		itr = zip(self._graphs, range(0, len(self._graphs)))
		if len(self._graphs) < 100 * self._n_jobs:
			chunksize = int(len(self._graphs) / self._n_jobs) + 1
		else:
			chunksize = 100
		iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize),
						desc='getting sp graphs', file=sys.stdout,
						length=len(self._graphs), verbose=(self._verbose >= 2))
		for i, g in iterator:
			self._graphs[i] = g
		pool.close()
		pool.join()

		# compute Gram matrix.
		gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

		def init_worker(gs_toshare):
			global G_gs
			G_gs = gs_toshare
		do_fun = self._wrapper_sp_do
		parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
					glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)

		return gram_matrix
Esempio n. 12
0
	def _compute_kernel_list_series(self, g1, g_list):
		self._check_graphs(g_list + [g1])
		self._add_dummy_labels(g_list + [g1])
		if not self._ds_infos['directed']:  #  convert
			g1 = g1.to_directed()
			g_list = [G.to_directed() for G in g_list]

		# compute kernel list.
		kernel_list = [None] * len(g_list)
		if self._verbose >= 2:
			iterator = get_iters(range(len(g_list)), desc='Computing kernels',
						 file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))
		else:
			iterator = range(len(g_list))

		# direct product graph method - exponential
		if self._compute_method == 'exp':
			for i in iterator:
				kernel = self._kernel_do_exp(g1, g_list[i], self._weight)
				kernel_list[i] = kernel
		# direct product graph method - geometric
		elif self._compute_method == 'geo':
			for i in iterator:
				kernel = self._kernel_do_geo(g1, g_list[i], self._weight)
				kernel_list[i] = kernel

		return kernel_list
Esempio n. 13
0
	def _compute_kernel_list_imap_unordered(self, g1, g_list):
		self._all_graphs_have_edges([g1] + g_list)
		# get shortest path graphs of g1 and each graph in g_list.
		g1 = getSPGraph(g1, edge_weight=self._edge_weight)
		pool = Pool(self._n_jobs)
		get_sp_graphs_fun = self._wrapper_get_sp_graphs
		itr = zip(g_list, range(0, len(g_list)))
		if len(g_list) < 100 * self._n_jobs:
			chunksize = int(len(g_list) / self._n_jobs) + 1
		else:
			chunksize = 100
		iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize),
						desc='getting sp graphs', file=sys.stdout,
						length=len(g_list), verbose=(self._verbose >= 2))
		for i, g in iterator:
			g_list[i] = g
		pool.close()
		pool.join()

		# compute Gram matrix.
		kernel_list = [None] * len(g_list)

		def init_worker(g1_toshare, gl_toshare):
			global G_g1, G_gl
			G_g1 = g1_toshare
			G_gl = gl_toshare
		do_fun = self._wrapper_kernel_list_do
		def func_assign(result, var_to_assign):
			var_to_assign[result[0]] = result[1]
		itr = range(len(g_list))
		len_itr = len(g_list)
		parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
			init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)

		return kernel_list
Esempio n. 14
0
	def _compute_gm_series(self):
		self._check_graphs(self._graphs)
		self._add_dummy_labels(self._graphs)
		if not self._ds_infos['directed']:  #  convert
			self._graphs = [G.to_directed() for G in self._graphs]

		# compute Gram matrix.
		gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

		from itertools import combinations_with_replacement
		itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
		len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
		iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout,
					length=len_itr, verbose=(self._verbose >= 2))

		# direct product graph method - exponential
		if self._compute_method == 'exp':
			for i, j in iterator:
				kernel = self._kernel_do_exp(self._graphs[i], self._graphs[j], self._weight)
				gram_matrix[i][j] = kernel
				gram_matrix[j][i] = kernel
		# direct product graph method - geometric
		elif self._compute_method == 'geo':
			for i, j in iterator:
				kernel = self._kernel_do_geo(self._graphs[i], self._graphs[j], self._weight)
				gram_matrix[i][j] = kernel
				gram_matrix[j][i] = kernel

		return gram_matrix
	def _compute_gm_imap_unordered(self):
		self._check_edge_weight(self._graphs, self._verbose)
		self._check_graphs(self._graphs)
		if self._verbose >= 2:
			import warnings
			warnings.warn('All labels are ignored.')

		# compute Gram matrix.
		gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

		if self._q is None:
			# don't normalize adjacency matrices if q is a uniform vector. Note
			# A_wave_list actually contains the transposes of the adjacency matrices.
			iterator = get_iters(self._graphs, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2))
			A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel?

			if self._p is None: # p is uniform distribution as default.
				def init_worker(A_wave_list_toshare):
					global G_A_wave_list
					G_A_wave_list = A_wave_list_toshare

				do_fun = self._wrapper_kernel_do

				parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
							glbv=(A_wave_list,), n_jobs=self._n_jobs, verbose=self._verbose)

			else: # @todo
				pass
		else: # @todo
			pass

		return gram_matrix
Esempio n. 16
0
    def _compute_kernel_list_imap_unordered(self, g1, g_list):
        # get shortest paths of g1 and each graph in g_list.
        sp1 = get_shortest_paths(g1, self._edge_weight,
                                 self._ds_infos['directed'])
        splist = [None] * len(g_list)
        pool = Pool(self._n_jobs)
        itr = zip(g_list, range(0, len(g_list)))
        if len(g_list) < 100 * self._n_jobs:
            chunksize = int(len(g_list) / self._n_jobs) + 1
        else:
            chunksize = 100
        # get shortest path graphs of g_list
        if self._compute_method == 'trie':
            get_sps_fun = self._wrapper_get_sps_trie
        else:
            get_sps_fun = self._wrapper_get_sps_naive
        iterator = get_iters(pool.imap_unordered(get_sps_fun, itr, chunksize),
                             desc='getting shortest paths',
                             file=sys.stdout,
                             length=len(g_list),
                             verbose=(self._verbose >= 2))
        for i, sp in iterator:
            splist[i] = sp
        pool.close()
        pool.join()

        # compute Gram matrix.
        kernel_list = [None] * len(g_list)

        def init_worker(sp1_toshare, spl_toshare, g1_toshare, gl_toshare):
            global G_sp1, G_spl, G_g1, G_gl
            G_sp1 = sp1_toshare
            G_spl = spl_toshare
            G_g1 = g1_toshare
            G_gl = gl_toshare

        if self._compute_method == 'trie':
            do_fun = self._wrapper_ssp_do_trie
        else:
            do_fun = self._wrapper_kernel_list_do

        def func_assign(result, var_to_assign):
            var_to_assign[result[0]] = result[1]

        itr = range(len(g_list))
        len_itr = len(g_list)
        parallel_me(do_fun,
                    func_assign,
                    kernel_list,
                    itr,
                    len_itr=len_itr,
                    init_worker=init_worker,
                    glbv=(sp1, splist, g1, g_list),
                    method='imap_unordered',
                    n_jobs=self._n_jobs,
                    itr_desc='Computing kernels',
                    verbose=self._verbose)

        return kernel_list
Esempio n. 17
0
	def _compute_gm_series(self):
		self._check_edge_weight(self._graphs, self._verbose)
		self._check_graphs(self._graphs)
		if self._verbose >= 2:
			import warnings
			warnings.warn('All labels are ignored. Only works for undirected graphs.')

		# compute Gram matrix.
		gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

		if self._q is None:
			# precompute the spectral decomposition of each graph.
			P_list = []
			D_list = []
			iterator = get_iters(self._graphs, desc='spectral decompose', file=sys.stdout, verbose=(self._verbose >= 2))
			for G in iterator:
				# don't normalize adjacency matrices if q is a uniform vector. Note
				# A actually is the transpose of the adjacency matrix.
				A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose()
				ew, ev = np.linalg.eig(A)
				D_list.append(ew)
				P_list.append(ev)
#		P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs?

			if self._p is None: # p is uniform distribution as default.
				q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in self._graphs]
#			q_T_list = [q.T for q in q_list]

				from itertools import combinations_with_replacement
				itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
				len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
				iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2))

				for i, j in iterator:
					kernel = self._kernel_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j], D_list[i], D_list[j], self._weight, self._sub_kernel)
					gram_matrix[i][j] = kernel
					gram_matrix[j][i] = kernel

			else: # @todo
				pass
		else: # @todo
			pass

		return gram_matrix
Esempio n. 18
0
    def _compute_gm_series(self):
        # get shortest paths of each graph in the graphs.
        splist = []
        iterator = get_iters(self._graphs,
                             desc='getting sp graphs',
                             file=sys.stdout,
                             verbose=(self._verbose >= 2))
        if self._compute_method == 'trie':
            for g in iterator:
                splist.append(self._get_sps_as_trie(g))
        else:
            for g in iterator:
                splist.append(
                    get_shortest_paths(g, self._edge_weight,
                                       self._ds_infos['directed']))

        # compute Gram matrix.
        gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

        from itertools import combinations_with_replacement
        itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
        len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
        iterator = get_iters(itr,
                             desc='Computing kernels',
                             file=sys.stdout,
                             length=len_itr,
                             verbose=(self._verbose >= 2))
        if self._compute_method == 'trie':
            for i, j in iterator:
                kernel = self._ssp_do_trie(self._graphs[i], self._graphs[j],
                                           splist[i], splist[j])
                gram_matrix[i][j] = kernel
                gram_matrix[j][i] = kernel
        else:
            for i, j in iterator:
                kernel = self._ssp_do_naive(self._graphs[i], self._graphs[j],
                                            splist[i], splist[j])
                #		if(kernel > 1):
                #			print("error here ")
                gram_matrix[i][j] = kernel
                gram_matrix[j][i] = kernel

        return gram_matrix
Esempio n. 19
0
    def _compute_gm_series(self):
        self._check_edge_weight(self._graphs, self._verbose)
        self._check_graphs(self._graphs)

        lmda = self._weight

        # Compute Gram matrix.
        gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

        # Reindex nodes using consecutive integers for the convenience of kernel computation.
        iterator = get_iters(self._graphs,
                             desc='Reindex vertices',
                             file=sys.stdout,
                             verbose=(self._verbose >= 2))
        self._graphs = [
            nx.convert_node_labels_to_integers(g,
                                               first_label=0,
                                               label_attribute='label_orignal')
            for g in iterator
        ]

        if self._p is None and self._q is None:  # p and q are uniform distributions as default.

            from itertools import combinations_with_replacement
            itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
            len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
            iterator = get_iters(itr,
                                 desc='Computing kernels',
                                 file=sys.stdout,
                                 length=len_itr,
                                 verbose=(self._verbose >= 2))

            for i, j in iterator:
                kernel = self._kernel_do(self._graphs[i], self._graphs[j],
                                         lmda)
                gram_matrix[i][j] = kernel
                gram_matrix[j][i] = kernel

        else:  # @todo
            pass

        return gram_matrix
	def _compute_gm_series(self):
		self._check_edge_weight(self._graphs, self._verbose)
		self._check_graphs(self._graphs)
		if self._verbose >= 2:
			import warnings
			warnings.warn('All labels are ignored.')

		lmda = self._weight

		# compute Gram matrix.
		gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

		if self._q is None:
			# don't normalize adjacency matrices if q is a uniform vector. Note
			# A_wave_list actually contains the transposes of the adjacency matrices.
			iterator = get_iters(self._graphs, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2))
			A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator]
	#		# normalized adjacency matrices
	#		A_wave_list = []
	#		for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout):
	#			A_tilde = nx.adjacency_matrix(G, eweight).todense().transpose()
	#			norm = A_tilde.sum(axis=0)
	#			norm[norm == 0] = 1
	#			A_wave_list.append(A_tilde / norm)

			if self._p is None: # p is uniform distribution as default.
				from itertools import combinations_with_replacement
				itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
				len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
				iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2))

				for i, j in iterator:
					kernel = self._kernel_do(A_wave_list[i], A_wave_list[j], lmda)
					gram_matrix[i][j] = kernel
					gram_matrix[j][i] = kernel

			else: # @todo
				pass
		else: # @todo
			pass

		return gram_matrix
Esempio n. 21
0
	def _compute_gm_series(self):
		self._all_graphs_have_edges(self._graphs)
		# get shortest path graph of each graph.
		iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2))
		self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator]

		# compute Gram matrix.
		gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

		from itertools import combinations_with_replacement
		itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
		len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
		iterator = get_iters(itr, desc='Computing kernels',
					length=len_itr, file=sys.stdout,verbose=(self._verbose >= 2))
		for i, j in iterator:
			kernel = self._sp_do(self._graphs[i], self._graphs[j])
			gram_matrix[i][j] = kernel
			gram_matrix[j][i] = kernel

		return gram_matrix
Esempio n. 22
0
	def _compute_gm_series(self):
		self._all_graphs_have_edges(self._graphs)
		# get shortest path graph of each graph.
		iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2))
		self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator]


		results = load_results(self._file_name, self._fcsp)

		# compute Gram matrix.
		gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

		from itertools import combinations_with_replacement
		itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
		len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
		iterator = get_iters(itr, desc='Computing kernels',
					length=len_itr, file=sys.stdout,verbose=(self._verbose >= 2))

		time0 = time.time()
		for i, j in iterator:
			if i > results['i'] or (i == results['i'] and j > results['j']):
				data = self._sp_do_space(self._graphs[i], self._graphs[j])
				if self._fcsp:
					results['nb_comparison'].append(data[0])
					if data[1] != {}:
						results['vk_dict_mem'].append(estimate_vk_memory(data[1],
								    nx.number_of_nodes(self._graphs[i]),
									nx.number_of_nodes(self._graphs[j])))
				else:
					results['nb_comparison'].append(data)
				results['i'] = i
				results['j'] = j

				time1 = time.time()
				if time1 - time0 > 600:
					save_results(self._file_name, results)
					time0 = time1

		compute_stats(self._file_name, results)

		return gram_matrix
Esempio n. 23
0
    def _compute_kernel_list_imap_unordered(self, g1, g_list):
        self._add_dummy_labels(g_list + [g1])

        if self._remove_totters:
            g1 = untotterTransformation(
                g1, self._node_labels,
                self._edge_labels)  # @todo: this may not work.
            pool = Pool(self._n_jobs)
            itr = range(0, len(g_list))
            if len(g_list) < 100 * self._n_jobs:
                chunksize = int(len(g_list) / self._n_jobs) + 1
            else:
                chunksize = 100
            remove_fun = self._wrapper_untotter
            iterator = get_iters(pool.imap_unordered(remove_fun, itr,
                                                     chunksize),
                                 desc='removing tottering',
                                 file=sys.stdout,
                                 length=len(g_list),
                                 verbose=(self._verbose >= 2))
            for i, g in iterator:
                g_list[i] = g
            pool.close()
            pool.join()

        # compute kernel list.
        kernel_list = [None] * len(g_list)

        def init_worker(g1_toshare, g_list_toshare):
            global G_g1, G_g_list
            G_g1 = g1_toshare
            G_g_list = g_list_toshare

        do_fun = self._wrapper_kernel_list_do

        def func_assign(result, var_to_assign):
            var_to_assign[result[0]] = result[1]

        itr = range(len(g_list))
        len_itr = len(g_list)
        parallel_me(do_fun,
                    func_assign,
                    kernel_list,
                    itr,
                    len_itr=len_itr,
                    init_worker=init_worker,
                    glbv=(g1, g_list),
                    method='imap_unordered',
                    n_jobs=self._n_jobs,
                    itr_desc='Computing kernels',
                    verbose=self._verbose)

        return kernel_list
Esempio n. 24
0
	def _compute_kernel_list_series(self, g1, g_list):
		self._check_edge_weight(g_list + [g1], self._verbose)
		self._check_graphs(g_list + [g1])
		if self._verbose >= 2:
			import warnings
			warnings.warn('All labels are ignored. Only works for undirected graphs.')

		# compute kernel list.
		kernel_list = [None] * len(g_list)

		if self._q is None:
			# precompute the spectral decomposition of each graph.
			A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
			D1, P1 = np.linalg.eig(A1)
			P_list = []
			D_list = []
			iterator = get_iters(g_list, desc='spectral decompose', file=sys.stdout, verbose=(self._verbose >= 2))
			for G in iterator:
				# don't normalize adjacency matrices if q is a uniform vector. Note
				# A actually is the transpose of the adjacency matrix.
				A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose()
				ew, ev = np.linalg.eig(A)
				D_list.append(ew)
				P_list.append(ev)

			if self._p is None: # p is uniform distribution as default.
				q_T1 = 1 / nx.number_of_nodes(g1)
				q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list]
				iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))

				for i in iterator:
					kernel = self._kernel_do(q_T1, q_T_list[i], P1, P_list[i], D1, D_list[i], self._weight, self._sub_kernel)
					kernel_list[i] = kernel

			else: # @todo
				pass
		else: # @todo
			pass

		return kernel_list
Esempio n. 25
0
    def _compute_kernel_list_imap_unordered(self, g1, g_list):
        self._check_edge_weight(g_list + [g1], self._verbose)
        self._check_graphs(g_list + [g1])

        # compute kernel list.
        kernel_list = [None] * len(g_list)

        # Reindex nodes using consecutive integers for the convenience of kernel computation.
        g1 = nx.convert_node_labels_to_integers(
            g1, first_label=0, label_attribute='label_orignal')
        # @todo: parallel this.
        iterator = get_iters(g_list,
                             desc='Reindex vertices',
                             file=sys.stdout,
                             verbose=(self._verbose >= 2))
        g_list = [
            nx.convert_node_labels_to_integers(g,
                                               first_label=0,
                                               label_attribute='label_orignal')
            for g in iterator
        ]

        if self._p is None and self._q is None:  # p and q are uniform distributions as default.

            def init_worker(g1_toshare, g_list_toshare):
                global G_g1, G_g_list
                G_g1 = g1_toshare
                G_g_list = g_list_toshare

            do_fun = self._wrapper_kernel_list_do

            def func_assign(result, var_to_assign):
                var_to_assign[result[0]] = result[1]

            itr = range(len(g_list))
            len_itr = len(g_list)
            parallel_me(do_fun,
                        func_assign,
                        kernel_list,
                        itr,
                        len_itr=len_itr,
                        init_worker=init_worker,
                        glbv=(g1, g_list),
                        method='imap_unordered',
                        n_jobs=self._n_jobs,
                        itr_desc='Computing kernels',
                        verbose=self._verbose)

        else:  # @todo
            pass

        return kernel_list
Esempio n. 26
0
    def _compute_kernel_list_imap_unordered(self, g1, g_list):
        self._add_dummy_labels(g_list + [g1])

        # get all canonical keys of all graphs before computing kernels to save
        # time, but this may cost a lot of memory for large dataset.
        canonkeys_1 = self._get_canonkeys(g1)
        canonkeys_list = [[] for _ in range(len(g_list))]
        pool = Pool(self._n_jobs)
        itr = zip(g_list, range(0, len(g_list)))
        if len(g_list) < 100 * self._n_jobs:
            chunksize = int(len(g_list) / self._n_jobs) + 1
        else:
            chunksize = 100
        get_fun = self._wrapper_get_canonkeys
        iterator = get_iters(pool.imap_unordered(get_fun, itr, chunksize),
                             desc='getting canonkeys',
                             file=sys.stdout,
                             length=len(g_list),
                             verbose=(self._verbose >= 2))
        for i, ck in iterator:
            canonkeys_list[i] = ck
        pool.close()
        pool.join()

        # compute kernel list.
        kernel_list = [None] * len(g_list)

        def init_worker(ck_1_toshare, ck_list_toshare):
            global G_ck_1, G_ck_list
            G_ck_1 = ck_1_toshare
            G_ck_list = ck_list_toshare

        do_fun = self._wrapper_kernel_list_do

        def func_assign(result, var_to_assign):
            var_to_assign[result[0]] = result[1]

        itr = range(len(g_list))
        len_itr = len(g_list)
        parallel_me(do_fun,
                    func_assign,
                    kernel_list,
                    itr,
                    len_itr=len_itr,
                    init_worker=init_worker,
                    glbv=(canonkeys_1, canonkeys_list),
                    method='imap_unordered',
                    n_jobs=self._n_jobs,
                    itr_desc='Computing kernels',
                    verbose=self._verbose)

        return kernel_list
Esempio n. 27
0
	def _compute_kernel_list_series(self, g1, g_list):
		self._add_dummy_labels(g_list + [g1])

		iterator_ps = get_iters(g_list, desc='getting paths', file=sys.stdout, verbose=(self._verbose >= 2))
		iterator_kernel = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))

		kernel_list = [None] * len(g_list)

		if self._compute_method == 'trie':
			paths_g1 = self._find_all_path_as_trie(g1)
			paths_g_list = [self._find_all_path_as_trie(g) for g in iterator_ps]
			for i in iterator_kernel:
				kernel = self._kernel_do_trie(paths_g1, paths_g_list[i])
				kernel_list[i] = kernel
		else:
			paths_g1 = self._find_all_paths_until_length(g1)
			paths_g_list = [self._find_all_paths_until_length(g) for g in iterator_ps]
			for i in iterator_kernel:
				kernel = self._kernel_do_naive(paths_g1, paths_g_list[i])
				kernel_list[i] = kernel

		return kernel_list
Esempio n. 28
0
    def _compute_kernel_list_series(self, g1, g_list):
        self._check_edge_weight(g_list + [g1], self._verbose)
        self._check_graphs(g_list + [g1])

        lmda = self._weight

        # compute kernel list.
        kernel_list = [None] * len(g_list)

        # Reindex nodes using consecutive integers for the convenience of kernel computation.
        g1 = nx.convert_node_labels_to_integers(
            g1, first_label=0, label_attribute='label_orignal')
        iterator = get_iters(g_list,
                             desc='Reindex vertices',
                             file=sys.stdout,
                             verbose=(self._verbose >= 2))
        g_list = [
            nx.convert_node_labels_to_integers(g,
                                               first_label=0,
                                               label_attribute='label_orignal')
            for g in iterator
        ]

        if self._p is None and self._q is None:  # p and q are uniform distributions as default.

            iterator = get_iters(range(len(g_list)),
                                 desc='Computing kernels',
                                 file=sys.stdout,
                                 length=len(g_list),
                                 verbose=(self._verbose >= 2))

            for i in iterator:
                kernel = self._kernel_do(g1, g_list[i], lmda)
                kernel_list[i] = kernel

        else:  # @todo
            pass

        return kernel_list
Esempio n. 29
0
	def _compute_gm_imap_unordered(self):
		self._add_dummy_labels(self._graphs)

		# get all paths of all graphs before computing kernels to save time,
		# but this may cost a lot of memory for large datasets.
		pool = Pool(self._n_jobs)
		itr = zip(self._graphs, range(0, len(self._graphs)))
		if len(self._graphs) < 100 * self._n_jobs:
			chunksize = int(len(self._graphs) / self._n_jobs) + 1
		else:
			chunksize = 100
		all_paths = [[] for _ in range(len(self._graphs))]
		if self._compute_method == 'trie' and self._k_func is not None:
			get_ps_fun = self._wrapper_find_all_path_as_trie
		elif self._compute_method != 'trie' and self._k_func is not None:
			get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True)
		else:
			get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False)
		iterator = get_iters(pool.imap_unordered(get_ps_fun, itr, chunksize),
						desc='getting paths', file=sys.stdout,
						length=len(self._graphs), verbose=(self._verbose >= 2))
		for i, ps in iterator:
			all_paths[i] = ps
		pool.close()
		pool.join()

		# compute Gram matrix.
		gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

		if self._compute_method == 'trie' and self._k_func is not None:
			def init_worker(trie_toshare):
				global G_trie
				G_trie = trie_toshare
			do_fun = self._wrapper_kernel_do_trie
		elif self._compute_method != 'trie' and self._k_func is not None:
			def init_worker(plist_toshare):
				global G_plist
				G_plist = plist_toshare
			do_fun = self._wrapper_kernel_do_naive
		else:
			def init_worker(plist_toshare):
				global G_plist
				G_plist = plist_toshare
			do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this?
		parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
					glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose)

		return gram_matrix
Esempio n. 30
0
	def _compute_kernel_list_imap_unordered(self, g1, g_list):
		self._add_dummy_labels(g_list + [g1])

		# get all paths of all graphs before computing kernels to save time,
		# but this may cost a lot of memory for large datasets.
		pool = Pool(self._n_jobs)
		itr = zip(g_list, range(0, len(g_list)))
		if len(g_list) < 100 * self._n_jobs:
			chunksize = int(len(g_list) / self._n_jobs) + 1
		else:
			chunksize = 100
		paths_g_list = [[] for _ in range(len(g_list))]
		if self._compute_method == 'trie' and self._k_func is not None:
			paths_g1 = self._find_all_path_as_trie(g1)
			get_ps_fun = self._wrapper_find_all_path_as_trie
		elif self._compute_method != 'trie' and self._k_func is not None:
			paths_g1 = self._find_all_paths_until_length(g1)
			get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True)
		else:
			paths_g1 = self._find_all_paths_until_length(g1)
			get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False)
		iterator = get_iters(pool.imap_unordered(get_ps_fun, itr, chunksize),
						desc='getting paths', file=sys.stdout,
						length=len(g_list), verbose=(self._verbose >= 2))
		for i, ps in iterator:
			paths_g_list[i] = ps
		pool.close()
		pool.join()

		# compute kernel list.
		kernel_list = [None] * len(g_list)

		def init_worker(p1_toshare, plist_toshare):
			global G_p1, G_plist
			G_p1 = p1_toshare
			G_plist = plist_toshare
		do_fun = self._wrapper_kernel_list_do
		def func_assign(result, var_to_assign):
			var_to_assign[result[0]] = result[1]
		itr = range(len(g_list))
		len_itr = len(g_list)
		parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
			init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)

		return kernel_list