def _compute_kernel_list_series(self, g1, g_list): self._all_graphs_have_edges([g1] + g_list) # get shortest path graphs of g1 and each graph in g_list. g1 = getSPGraph(g1, edge_weight=self._edge_weight) iterator = get_iters(g_list, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2)) g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] # compute kernel list. kernel_list = [None] * len(g_list) iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) for i in iterator: kernel = self._sp_do(g1, g_list[i]) kernel_list[i] = kernel return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): self._all_graphs_have_edges([g1] + g_list) # get shortest path graphs of g1 and each graph in g_list. g1 = getSPGraph(g1, edge_weight=self._edge_weight) pool = Pool(self._n_jobs) get_sp_graphs_fun = self._wrapper_get_sp_graphs itr = zip(g_list, range(0, len(g_list))) if len(g_list) < 100 * self._n_jobs: chunksize = int(len(g_list) / self._n_jobs) + 1 else: chunksize = 100 iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize), desc='getting sp graphs', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) for i, g in iterator: g_list[i] = g pool.close() pool.join() # compute Gram matrix. kernel_list = [None] * len(g_list) def init_worker(g1_toshare, gl_toshare): global G_g1, G_gl G_g1 = g1_toshare G_gl = gl_toshare do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list
def _compute_gm_series(self): # get shortest path graph of each graph. if self._verbose >= 2: iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout) else: iterator = self._graphs self._graphs = [ getSPGraph(g, edge_weight=self.__edge_weight) for g in iterator ] # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) if self._verbose >= 2: iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) else: iterator = itr for i, j in iterator: kernel = self.__sp_do(self._graphs[i], self._graphs[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel return gram_matrix
def _compute_kernel_list_series(self, g1, g_list): # get shortest path graphs of g1 and each graph in g_list. g1 = getSPGraph(g1, edge_weight=self.__edge_weight) if self._verbose >= 2: iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout) else: iterator = g_list g_list = [ getSPGraph(g, edge_weight=self.__edge_weight) for g in iterator ] # compute kernel list. kernel_list = [None] * len(g_list) if self._verbose >= 2: iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) else: iterator = range(len(g_list)) for i in iterator: kernel = self.__sp_do(g1, g_list[i]) kernel_list[i] = kernel return kernel_list
def _compute_gm_series(self): self._all_graphs_have_edges(self._graphs) # get shortest path graph of each graph. iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2)) self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) iterator = get_iters(itr, desc='Computing kernels', length=len_itr, file=sys.stdout,verbose=(self._verbose >= 2)) for i, j in iterator: kernel = self._sp_do(self._graphs[i], self._graphs[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel return gram_matrix
def _compute_gm_series(self): self._all_graphs_have_edges(self._graphs) # get shortest path graph of each graph. iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2)) self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] results = load_results(self._file_name, self._fcsp) # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) iterator = get_iters(itr, desc='Computing kernels', length=len_itr, file=sys.stdout,verbose=(self._verbose >= 2)) time0 = time.time() for i, j in iterator: if i > results['i'] or (i == results['i'] and j > results['j']): data = self._sp_do_space(self._graphs[i], self._graphs[j]) if self._fcsp: results['nb_comparison'].append(data[0]) if data[1] != {}: results['vk_dict_mem'].append(estimate_vk_memory(data[1], nx.number_of_nodes(self._graphs[i]), nx.number_of_nodes(self._graphs[j]))) else: results['nb_comparison'].append(data) results['i'] = i results['j'] = j time1 = time.time() if time1 - time0 > 600: save_results(self._file_name, results) time0 = time1 compute_stats(self._file_name, results) return gram_matrix
def wrapper_getSPGraph(weight, itr_item): g = itr_item[0] i = itr_item[1] return i, getSPGraph(g, edge_weight=weight)
def _wrapper_get_sp_graphs(self, itr_item): g = itr_item[0] i = itr_item[1] return i, getSPGraph(g, edge_weight=self._edge_weight)
def _compute_single_kernel_series(self, g1, g2): self._all_graphs_have_edges([g1] + [g2]) g1 = getSPGraph(g1, edge_weight=self._edge_weight) g2 = getSPGraph(g2, edge_weight=self._edge_weight) kernel = self._sp_do(g1, g2) return kernel
def _wl_spkernel_do(Gn, node_label, edge_label, height): """Compute Weisfeiler-Lehman shortest path kernels between graphs. Parameters ---------- Gn : List of NetworkX graph List of graphs between which the kernels are computed. node_label : string node attribute used as label. edge_label : string edge attribute used as label. height : int subtree height. Return ------ gram_matrix : Numpy matrix Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. """ pass from gklearn.utils.utils import getSPGraph # init. height = int(height) gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel Gn = [ getSPGraph(G, edge_weight = edge_label) for G in Gn ] # get shortest path graphs of Gn # initial for height = 0 for i in range(0, len(Gn)): for j in range(i, len(Gn)): for e1 in Gn[i].edges(data = True): for e2 in Gn[j].edges(data = True): if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): gram_matrix[i][j] += 1 gram_matrix[j][i] = gram_matrix[i][j] # iterate each height for h in range(1, height + 1): all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs for G in Gn: # for each graph set_multisets = [] for node in G.nodes(data = True): # Multiset-label determination. multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] # sorting each multiset multiset.sort() multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix set_multisets.append(multiset) # label compression set_unique = list(set(set_multisets)) # set of unique multiset labels # a dictionary mapping original labels to new ones. set_compressed = {} # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label for value in set_unique: if value in all_set_compressed.keys(): set_compressed.update({ value : all_set_compressed[value] }) else: set_compressed.update({ value : str(num_of_labels_occured + 1) }) num_of_labels_occured += 1 all_set_compressed.update(set_compressed) # relabel nodes for node in G.nodes(data = True): node[1][node_label] = set_compressed[set_multisets[node[0]]] # Compute subtree kernel with h iterations and add it to the final kernel for i in range(0, len(Gn)): for j in range(i, len(Gn)): for e1 in Gn[i].edges(data = True): for e2 in Gn[j].edges(data = True): if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): gram_matrix[i][j] += 1 gram_matrix[j][i] = gram_matrix[i][j] return gram_matrix
def _compute_single_kernel_series(self, g1, g2): g1 = getSPGraph(g1, edge_weight=self.__edge_weight) g2 = getSPGraph(g2, edge_weight=self.__edge_weight) kernel = self.__sp_do(g1, g2) return kernel