def _compute_kernel_list_series(self, g1, g_list): self._add_dummy_labels(g_list + [g1]) if self._remove_totters: g1 = untotterTransformation( g1, self._node_labels, self._edge_labels) # @todo: this may not work. iterator = get_iters(g_list, desc='removing tottering', file=sys.stdout, verbose=(self._verbose >= 2)) # @todo: this may not work. g_list = [ untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator ] # compute kernel list. kernel_list = [None] * len(g_list) iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) for i in iterator: kernel = self._kernel_do(g1, g_list[i]) kernel_list[i] = kernel return kernel_list
def _compute_kernel_list_series(self, g1, g_list): self._check_edge_weight(g_list + [g1], self._verbose) self._check_graphs(g_list + [g1]) if self._verbose >= 2: import warnings warnings.warn('All labels are ignored.') lmda = self._weight # compute kernel list. kernel_list = [None] * len(g_list) if self._q is None: # don't normalize adjacency matrices if q is a uniform vector. Note # A_wave_list actually contains the transposes of the adjacency matrices. A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() iterator = get_iters(g_list, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2)) A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] if self._p is None: # p is uniform distribution as default. iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) for i in iterator: kernel = self._kernel_do(A_wave_1, A_wave_list[i], lmda) kernel_list[i] = kernel else: # @todo pass else: # @todo pass return kernel_list
def _compute_kernel_list_series(self, g1, g_list): # get shortest paths of g1 and each graph in g_list. sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) splist = [] iterator = get_iters(g_list, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2)) if self._compute_method == 'trie': for g in iterator: splist.append(self._get_sps_as_trie(g)) else: for g in iterator: splist.append( get_shortest_paths(g, self._edge_weight, self._ds_infos['directed'])) # compute kernel list. kernel_list = [None] * len(g_list) iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) if self._compute_method == 'trie': for i in iterator: kernel = self._ssp_do_trie(g1, g_list[i], sp1, splist[i]) kernel_list[i] = kernel else: for i in iterator: kernel = self._ssp_do_naive(g1, g_list[i], sp1, splist[i]) kernel_list[i] = kernel return kernel_list
def _compute_gm_series(self): self._add_dummy_labels(self._graphs) if self._remove_totters: iterator = get_iters(self._graphs, desc='removing tottering', file=sys.stdout, verbose=(self._verbose >= 2)) # @todo: this may not work. self._graphs = [ untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator ] # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) for i, j in iterator: kernel = self._kernel_do(self._graphs[i], self._graphs[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel # @todo: no directed graph considered? return gram_matrix
def compute_D(G_app, edit_cost, G_test=None, ed_method='BIPARTITE', **kwargs): import numpy as np N = len(G_app) D_app = np.zeros((N, N)) for i, G1 in get_iters(enumerate(G_app), desc='Computing D - app', file=sys.stdout, length=N): for j, G2 in enumerate(G_app[i + 1:], i + 1): D_app[i, j], _ = compute_ged(G1, G2, edit_cost, method=ed_method, **kwargs) D_app[j, i] = D_app[i, j] if (G_test is None): return D_app, edit_cost else: D_test = np.zeros((len(G_test), N)) for i, G1 in get_iters(enumerate(G_test), desc='Computing D - test', file=sys.stdout, length=len(G_test)): for j, G2 in enumerate(G_app): D_test[i, j], _ = compute_ged(G1, G2, edit_cost, method=ed_method, **kwargs) return D_app, D_test, edit_cost
def _compute_gm_series(self): self._add_dummy_labels(self._graphs) # get all canonical keys of all graphs before computing kernels to save # time, but this may cost a lot of memory for large dataset. canonkeys = [] iterator = get_iters(self._graphs, desc='getting canonkeys', file=sys.stdout, verbose=(self._verbose >= 2)) for g in iterator: canonkeys.append(self._get_canonkeys(g)) # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) for i, j in iterator: kernel = self._kernel_do(canonkeys[i], canonkeys[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel # @todo: no directed graph considered? return gram_matrix
def _compute_gm_series(self): self._add_dummy_labels(self._graphs) from itertools import combinations_with_replacement itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) iterator_ps = get_iters(range(0, len(self._graphs)), desc='getting paths', file=sys.stdout, length=len(self._graphs), verbose=(self._verbose >= 2)) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) iterator_kernel = get_iters(itr_kernel, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) if self._compute_method == 'trie': all_paths = [self._find_all_path_as_trie(self._graphs[i]) for i in iterator_ps] for i, j in iterator_kernel: kernel = self._kernel_do_trie(all_paths[i], all_paths[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel else: all_paths = [self._find_all_paths_until_length(self._graphs[i]) for i in iterator_ps] for i, j in iterator_kernel: kernel = self._kernel_do_naive(all_paths[i], all_paths[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel return gram_matrix
def _compute_kernel_list_series(self, g1, g_list): self._add_dummy_labels(g_list + [g1]) # get all canonical keys of all graphs before computing kernels to save # time, but this may cost a lot of memory for large dataset. canonkeys_1 = self._get_canonkeys(g1) canonkeys_list = [] iterator = get_iters(g_list, desc='getting canonkeys', file=sys.stdout, verbose=(self._verbose >= 2)) for g in iterator: canonkeys_list.append(self._get_canonkeys(g)) # compute kernel list. kernel_list = [None] * len(g_list) iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) for i in iterator: kernel = self._kernel_do(canonkeys_1, canonkeys_list[i]) kernel_list[i] = kernel return kernel_list
def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker=None, glbv=None, method=None, n_jobs=None, chunksize=None, itr_desc='', verbose=True): ''' ''' if method == 'imap_unordered': if glbv: # global varibles required. # def init_worker(v_share): # global G_var # G_var = v_share if n_jobs == None: n_jobs = multiprocessing.cpu_count() with Pool(processes=n_jobs, initializer=init_worker, initargs=glbv) as pool: if chunksize is None: if len_itr < 100 * n_jobs: chunksize = int(len_itr / n_jobs) + 1 else: chunksize = 100 iterator = get_iters(pool.imap_unordered(func, itr, chunksize), desc=itr_desc, file=sys.stdout, length=len_itr, verbose=(verbose >= 2)) for result in iterator: func_assign(result, var_to_assign) pool.close() pool.join() else: if n_jobs == None: n_jobs = multiprocessing.cpu_count() with Pool(processes=n_jobs) as pool: if chunksize is None: if len_itr < 100 * n_jobs: chunksize = int(len_itr / n_jobs) + 1 else: chunksize = 100 iterator = get_iters(pool.imap_unordered(func, itr, chunksize), desc=itr_desc, file=sys.stdout, length=len_itr, verbose=(verbose >= 2)) for result in iterator: func_assign(result, var_to_assign) pool.close() pool.join()
def _compute_kernel_list_series(self, g1, g_list): self._all_graphs_have_edges([g1] + g_list) # get shortest path graphs of g1 and each graph in g_list. g1 = getSPGraph(g1, edge_weight=self._edge_weight) iterator = get_iters(g_list, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2)) g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] # compute kernel list. kernel_list = [None] * len(g_list) iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) for i in iterator: kernel = self._sp_do(g1, g_list[i]) kernel_list[i] = kernel return kernel_list
def _compute_gm_imap_unordered(self): self._all_graphs_have_edges(self._graphs) # get shortest path graph of each graph. pool = Pool(self._n_jobs) get_sp_graphs_fun = self._wrapper_get_sp_graphs itr = zip(self._graphs, range(0, len(self._graphs))) if len(self._graphs) < 100 * self._n_jobs: chunksize = int(len(self._graphs) / self._n_jobs) + 1 else: chunksize = 100 iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize), desc='getting sp graphs', file=sys.stdout, length=len(self._graphs), verbose=(self._verbose >= 2)) for i, g in iterator: self._graphs[i] = g pool.close() pool.join() # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) def init_worker(gs_toshare): global G_gs G_gs = gs_toshare do_fun = self._wrapper_sp_do parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) return gram_matrix
def _compute_kernel_list_series(self, g1, g_list): self._check_graphs(g_list + [g1]) self._add_dummy_labels(g_list + [g1]) if not self._ds_infos['directed']: # convert g1 = g1.to_directed() g_list = [G.to_directed() for G in g_list] # compute kernel list. kernel_list = [None] * len(g_list) if self._verbose >= 2: iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) else: iterator = range(len(g_list)) # direct product graph method - exponential if self._compute_method == 'exp': for i in iterator: kernel = self._kernel_do_exp(g1, g_list[i], self._weight) kernel_list[i] = kernel # direct product graph method - geometric elif self._compute_method == 'geo': for i in iterator: kernel = self._kernel_do_geo(g1, g_list[i], self._weight) kernel_list[i] = kernel return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): self._all_graphs_have_edges([g1] + g_list) # get shortest path graphs of g1 and each graph in g_list. g1 = getSPGraph(g1, edge_weight=self._edge_weight) pool = Pool(self._n_jobs) get_sp_graphs_fun = self._wrapper_get_sp_graphs itr = zip(g_list, range(0, len(g_list))) if len(g_list) < 100 * self._n_jobs: chunksize = int(len(g_list) / self._n_jobs) + 1 else: chunksize = 100 iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize), desc='getting sp graphs', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) for i, g in iterator: g_list[i] = g pool.close() pool.join() # compute Gram matrix. kernel_list = [None] * len(g_list) def init_worker(g1_toshare, gl_toshare): global G_g1, G_gl G_g1 = g1_toshare G_gl = gl_toshare do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list
def _compute_gm_series(self): self._check_graphs(self._graphs) self._add_dummy_labels(self._graphs) if not self._ds_infos['directed']: # convert self._graphs = [G.to_directed() for G in self._graphs] # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) # direct product graph method - exponential if self._compute_method == 'exp': for i, j in iterator: kernel = self._kernel_do_exp(self._graphs[i], self._graphs[j], self._weight) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel # direct product graph method - geometric elif self._compute_method == 'geo': for i, j in iterator: kernel = self._kernel_do_geo(self._graphs[i], self._graphs[j], self._weight) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel return gram_matrix
def _compute_gm_imap_unordered(self): self._check_edge_weight(self._graphs, self._verbose) self._check_graphs(self._graphs) if self._verbose >= 2: import warnings warnings.warn('All labels are ignored.') # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) if self._q is None: # don't normalize adjacency matrices if q is a uniform vector. Note # A_wave_list actually contains the transposes of the adjacency matrices. iterator = get_iters(self._graphs, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2)) A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? if self._p is None: # p is uniform distribution as default. def init_worker(A_wave_list_toshare): global G_A_wave_list G_A_wave_list = A_wave_list_toshare do_fun = self._wrapper_kernel_do parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, glbv=(A_wave_list,), n_jobs=self._n_jobs, verbose=self._verbose) else: # @todo pass else: # @todo pass return gram_matrix
def _compute_kernel_list_imap_unordered(self, g1, g_list): # get shortest paths of g1 and each graph in g_list. sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) splist = [None] * len(g_list) pool = Pool(self._n_jobs) itr = zip(g_list, range(0, len(g_list))) if len(g_list) < 100 * self._n_jobs: chunksize = int(len(g_list) / self._n_jobs) + 1 else: chunksize = 100 # get shortest path graphs of g_list if self._compute_method == 'trie': get_sps_fun = self._wrapper_get_sps_trie else: get_sps_fun = self._wrapper_get_sps_naive iterator = get_iters(pool.imap_unordered(get_sps_fun, itr, chunksize), desc='getting shortest paths', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) for i, sp in iterator: splist[i] = sp pool.close() pool.join() # compute Gram matrix. kernel_list = [None] * len(g_list) def init_worker(sp1_toshare, spl_toshare, g1_toshare, gl_toshare): global G_sp1, G_spl, G_g1, G_gl G_sp1 = sp1_toshare G_spl = spl_toshare G_g1 = g1_toshare G_gl = gl_toshare if self._compute_method == 'trie': do_fun = self._wrapper_ssp_do_trie else: do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list
def _compute_gm_series(self): self._check_edge_weight(self._graphs, self._verbose) self._check_graphs(self._graphs) if self._verbose >= 2: import warnings warnings.warn('All labels are ignored. Only works for undirected graphs.') # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) if self._q is None: # precompute the spectral decomposition of each graph. P_list = [] D_list = [] iterator = get_iters(self._graphs, desc='spectral decompose', file=sys.stdout, verbose=(self._verbose >= 2)) for G in iterator: # don't normalize adjacency matrices if q is a uniform vector. Note # A actually is the transpose of the adjacency matrix. A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose() ew, ev = np.linalg.eig(A) D_list.append(ew) P_list.append(ev) # P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs? if self._p is None: # p is uniform distribution as default. q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in self._graphs] # q_T_list = [q.T for q in q_list] from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) for i, j in iterator: kernel = self._kernel_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j], D_list[i], D_list[j], self._weight, self._sub_kernel) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel else: # @todo pass else: # @todo pass return gram_matrix
def _compute_gm_series(self): # get shortest paths of each graph in the graphs. splist = [] iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2)) if self._compute_method == 'trie': for g in iterator: splist.append(self._get_sps_as_trie(g)) else: for g in iterator: splist.append( get_shortest_paths(g, self._edge_weight, self._ds_infos['directed'])) # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) if self._compute_method == 'trie': for i, j in iterator: kernel = self._ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel else: for i, j in iterator: kernel = self._ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j]) # if(kernel > 1): # print("error here ") gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel return gram_matrix
def _compute_gm_series(self): self._check_edge_weight(self._graphs, self._verbose) self._check_graphs(self._graphs) lmda = self._weight # Compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) # Reindex nodes using consecutive integers for the convenience of kernel computation. iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) self._graphs = [ nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator ] if self._p is None and self._q is None: # p and q are uniform distributions as default. from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) for i, j in iterator: kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel else: # @todo pass return gram_matrix
def _compute_gm_series(self): self._check_edge_weight(self._graphs, self._verbose) self._check_graphs(self._graphs) if self._verbose >= 2: import warnings warnings.warn('All labels are ignored.') lmda = self._weight # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) if self._q is None: # don't normalize adjacency matrices if q is a uniform vector. Note # A_wave_list actually contains the transposes of the adjacency matrices. iterator = get_iters(self._graphs, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2)) A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # # normalized adjacency matrices # A_wave_list = [] # for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout): # A_tilde = nx.adjacency_matrix(G, eweight).todense().transpose() # norm = A_tilde.sum(axis=0) # norm[norm == 0] = 1 # A_wave_list.append(A_tilde / norm) if self._p is None: # p is uniform distribution as default. from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) for i, j in iterator: kernel = self._kernel_do(A_wave_list[i], A_wave_list[j], lmda) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel else: # @todo pass else: # @todo pass return gram_matrix
def _compute_gm_series(self): self._all_graphs_have_edges(self._graphs) # get shortest path graph of each graph. iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2)) self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) iterator = get_iters(itr, desc='Computing kernels', length=len_itr, file=sys.stdout,verbose=(self._verbose >= 2)) for i, j in iterator: kernel = self._sp_do(self._graphs[i], self._graphs[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel return gram_matrix
def _compute_gm_series(self): self._all_graphs_have_edges(self._graphs) # get shortest path graph of each graph. iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2)) self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] results = load_results(self._file_name, self._fcsp) # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) iterator = get_iters(itr, desc='Computing kernels', length=len_itr, file=sys.stdout,verbose=(self._verbose >= 2)) time0 = time.time() for i, j in iterator: if i > results['i'] or (i == results['i'] and j > results['j']): data = self._sp_do_space(self._graphs[i], self._graphs[j]) if self._fcsp: results['nb_comparison'].append(data[0]) if data[1] != {}: results['vk_dict_mem'].append(estimate_vk_memory(data[1], nx.number_of_nodes(self._graphs[i]), nx.number_of_nodes(self._graphs[j]))) else: results['nb_comparison'].append(data) results['i'] = i results['j'] = j time1 = time.time() if time1 - time0 > 600: save_results(self._file_name, results) time0 = time1 compute_stats(self._file_name, results) return gram_matrix
def _compute_kernel_list_imap_unordered(self, g1, g_list): self._add_dummy_labels(g_list + [g1]) if self._remove_totters: g1 = untotterTransformation( g1, self._node_labels, self._edge_labels) # @todo: this may not work. pool = Pool(self._n_jobs) itr = range(0, len(g_list)) if len(g_list) < 100 * self._n_jobs: chunksize = int(len(g_list) / self._n_jobs) + 1 else: chunksize = 100 remove_fun = self._wrapper_untotter iterator = get_iters(pool.imap_unordered(remove_fun, itr, chunksize), desc='removing tottering', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) for i, g in iterator: g_list[i] = g pool.close() pool.join() # compute kernel list. kernel_list = [None] * len(g_list) def init_worker(g1_toshare, g_list_toshare): global G_g1, G_g_list G_g1 = g1_toshare G_g_list = g_list_toshare do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list
def _compute_kernel_list_series(self, g1, g_list): self._check_edge_weight(g_list + [g1], self._verbose) self._check_graphs(g_list + [g1]) if self._verbose >= 2: import warnings warnings.warn('All labels are ignored. Only works for undirected graphs.') # compute kernel list. kernel_list = [None] * len(g_list) if self._q is None: # precompute the spectral decomposition of each graph. A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() D1, P1 = np.linalg.eig(A1) P_list = [] D_list = [] iterator = get_iters(g_list, desc='spectral decompose', file=sys.stdout, verbose=(self._verbose >= 2)) for G in iterator: # don't normalize adjacency matrices if q is a uniform vector. Note # A actually is the transpose of the adjacency matrix. A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose() ew, ev = np.linalg.eig(A) D_list.append(ew) P_list.append(ev) if self._p is None: # p is uniform distribution as default. q_T1 = 1 / nx.number_of_nodes(g1) q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) for i in iterator: kernel = self._kernel_do(q_T1, q_T_list[i], P1, P_list[i], D1, D_list[i], self._weight, self._sub_kernel) kernel_list[i] = kernel else: # @todo pass else: # @todo pass return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): self._check_edge_weight(g_list + [g1], self._verbose) self._check_graphs(g_list + [g1]) # compute kernel list. kernel_list = [None] * len(g_list) # Reindex nodes using consecutive integers for the convenience of kernel computation. g1 = nx.convert_node_labels_to_integers( g1, first_label=0, label_attribute='label_orignal') # @todo: parallel this. iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) g_list = [ nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator ] if self._p is None and self._q is None: # p and q are uniform distributions as default. def init_worker(g1_toshare, g_list_toshare): global G_g1, G_g_list G_g1 = g1_toshare G_g_list = g_list_toshare do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) else: # @todo pass return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): self._add_dummy_labels(g_list + [g1]) # get all canonical keys of all graphs before computing kernels to save # time, but this may cost a lot of memory for large dataset. canonkeys_1 = self._get_canonkeys(g1) canonkeys_list = [[] for _ in range(len(g_list))] pool = Pool(self._n_jobs) itr = zip(g_list, range(0, len(g_list))) if len(g_list) < 100 * self._n_jobs: chunksize = int(len(g_list) / self._n_jobs) + 1 else: chunksize = 100 get_fun = self._wrapper_get_canonkeys iterator = get_iters(pool.imap_unordered(get_fun, itr, chunksize), desc='getting canonkeys', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) for i, ck in iterator: canonkeys_list[i] = ck pool.close() pool.join() # compute kernel list. kernel_list = [None] * len(g_list) def init_worker(ck_1_toshare, ck_list_toshare): global G_ck_1, G_ck_list G_ck_1 = ck_1_toshare G_ck_list = ck_list_toshare do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list
def _compute_kernel_list_series(self, g1, g_list): self._add_dummy_labels(g_list + [g1]) iterator_ps = get_iters(g_list, desc='getting paths', file=sys.stdout, verbose=(self._verbose >= 2)) iterator_kernel = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) kernel_list = [None] * len(g_list) if self._compute_method == 'trie': paths_g1 = self._find_all_path_as_trie(g1) paths_g_list = [self._find_all_path_as_trie(g) for g in iterator_ps] for i in iterator_kernel: kernel = self._kernel_do_trie(paths_g1, paths_g_list[i]) kernel_list[i] = kernel else: paths_g1 = self._find_all_paths_until_length(g1) paths_g_list = [self._find_all_paths_until_length(g) for g in iterator_ps] for i in iterator_kernel: kernel = self._kernel_do_naive(paths_g1, paths_g_list[i]) kernel_list[i] = kernel return kernel_list
def _compute_kernel_list_series(self, g1, g_list): self._check_edge_weight(g_list + [g1], self._verbose) self._check_graphs(g_list + [g1]) lmda = self._weight # compute kernel list. kernel_list = [None] * len(g_list) # Reindex nodes using consecutive integers for the convenience of kernel computation. g1 = nx.convert_node_labels_to_integers( g1, first_label=0, label_attribute='label_orignal') iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) g_list = [ nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator ] if self._p is None and self._q is None: # p and q are uniform distributions as default. iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) for i in iterator: kernel = self._kernel_do(g1, g_list[i], lmda) kernel_list[i] = kernel else: # @todo pass return kernel_list
def _compute_gm_imap_unordered(self): self._add_dummy_labels(self._graphs) # get all paths of all graphs before computing kernels to save time, # but this may cost a lot of memory for large datasets. pool = Pool(self._n_jobs) itr = zip(self._graphs, range(0, len(self._graphs))) if len(self._graphs) < 100 * self._n_jobs: chunksize = int(len(self._graphs) / self._n_jobs) + 1 else: chunksize = 100 all_paths = [[] for _ in range(len(self._graphs))] if self._compute_method == 'trie' and self._k_func is not None: get_ps_fun = self._wrapper_find_all_path_as_trie elif self._compute_method != 'trie' and self._k_func is not None: get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) else: get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) iterator = get_iters(pool.imap_unordered(get_ps_fun, itr, chunksize), desc='getting paths', file=sys.stdout, length=len(self._graphs), verbose=(self._verbose >= 2)) for i, ps in iterator: all_paths[i] = ps pool.close() pool.join() # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) if self._compute_method == 'trie' and self._k_func is not None: def init_worker(trie_toshare): global G_trie G_trie = trie_toshare do_fun = self._wrapper_kernel_do_trie elif self._compute_method != 'trie' and self._k_func is not None: def init_worker(plist_toshare): global G_plist G_plist = plist_toshare do_fun = self._wrapper_kernel_do_naive else: def init_worker(plist_toshare): global G_plist G_plist = plist_toshare do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this? parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose) return gram_matrix
def _compute_kernel_list_imap_unordered(self, g1, g_list): self._add_dummy_labels(g_list + [g1]) # get all paths of all graphs before computing kernels to save time, # but this may cost a lot of memory for large datasets. pool = Pool(self._n_jobs) itr = zip(g_list, range(0, len(g_list))) if len(g_list) < 100 * self._n_jobs: chunksize = int(len(g_list) / self._n_jobs) + 1 else: chunksize = 100 paths_g_list = [[] for _ in range(len(g_list))] if self._compute_method == 'trie' and self._k_func is not None: paths_g1 = self._find_all_path_as_trie(g1) get_ps_fun = self._wrapper_find_all_path_as_trie elif self._compute_method != 'trie' and self._k_func is not None: paths_g1 = self._find_all_paths_until_length(g1) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) else: paths_g1 = self._find_all_paths_until_length(g1) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) iterator = get_iters(pool.imap_unordered(get_ps_fun, itr, chunksize), desc='getting paths', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) for i, ps in iterator: paths_g_list[i] = ps pool.close() pool.join() # compute kernel list. kernel_list = [None] * len(g_list) def init_worker(p1_toshare, plist_toshare): global G_p1, G_plist G_p1 = p1_toshare G_plist = plist_toshare do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list