def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, gkernel, epsilon=0.001, InitIAMWithAllDk=False, params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 'ite_max': 50, 'epsilon': 0.001, 'removeNodes': True, 'connected': False}, params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 'method': 'IPFP', 'edit_cost_constant': [], 'stabilizer': 'min', 'repeat': 50}): """This function constructs graph pre-image by the iterative pre-image framework in reference [1], algorithm 1, where the step of generating new graphs randomly is replaced by the IAM algorithm in reference [2]. notes ----- Every time a set of n better graphs is acquired, their distances in kernel space are compared with the k nearest ones, and the k nearest distances from the k+n distances will be used as the new ones. """ # compute k nearest neighbors of phi in DN. dis_all = [] # distance between g_star and each graph. term3 = 0 for i1, a1 in enumerate(alpha): for i2, a2 in enumerate(alpha): term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout): dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3) dis_all.append(dtemp) # sort sort_idx = np.argsort(dis_all) dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist()) ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN if dis_k[0] == 0: # the exact pre-image. print('The exact pre-image is found from the input dataset.') return 0, ghat_list, 0, 0 dhat = dis_k[0] # the nearest distance # for g in ghat_list: # draw_Letter_graph(g) # nx.draw_networkx(g) # plt.show() # print(g.nodes(data=True)) # print(g.edges(data=True)) Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors # for gi in Gk: # nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) ## nx.draw_networkx(gi) # plt.show() ## draw_Letter_graph(g) # print(gi.nodes(data=True)) # print(gi.edges(data=True)) # i = 1 r = 0 itr_total = 0 dis_of_each_itr = [dhat] found = False nb_updated = 0 nb_updated_k = 0 while r < r_max:# and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon: print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-') print('Current preimage iteration =', r) print('Total preimage iteration =', itr_total, '\n') found = False Gn_nearest_median = [g.copy() for g in Gk] if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM. ghat_new_list = [] for g_tmp in Gk: Gn_nearest_init = [g_tmp.copy()] ghat_new_list_tmp, _, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, params_ged=params_ged, **params_iam) ghat_new_list += ghat_new_list_tmp else: # only the best graph in D_k is used to initialize IAM. Gn_nearest_init = [g.copy() for g in Gk] ghat_new_list, _, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, params_ged=params_ged, **params_iam) # for g in g_tmp_list: # nx.draw_networkx(g) # plt.show() # draw_Letter_graph(g) # print(g.nodes(data=True)) # print(g.edges(data=True)) # compute distance between \psi and the new generated graphs. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False) dhat_new_list = [] for idx, g_tmp in enumerate(ghat_new_list): # @todo: the term3 below could use the one at the beginning of the function. dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), len(ghat_new_list) + len(Gn_median) + 1), alpha, knew, withterm3=False)) for idx_g, ghat_new in enumerate(ghat_new_list): dhat_new = dhat_new_list[idx_g] # if the new distance is smaller than the max of D_k. if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon: # check if the new distance is the same as one in D_k. is_duplicate = False for dis_tmp in dis_k[1:-1]: if np.abs(dhat_new - dis_tmp) < epsilon: is_duplicate = True print('IAM: duplicate k nearest graph generated.') break if not is_duplicate: if np.abs(dhat_new - dhat) < epsilon: print('IAM: I am equal!') # dhat = dhat_new # ghat_list = [ghat_new.copy()] else: print('IAM: we got better k nearest neighbors!') nb_updated_k += 1 print('the k nearest neighbors are updated', nb_updated_k, 'times.') dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance. Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph. sort_idx = np.argsort(dis_k) dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances. Gk = [Gk[idx] for idx in sort_idx[0:k]] if dhat_new < dhat: print('IAM: I have smaller distance!') print(str(dhat) + '->' + str(dhat_new)) dhat = dhat_new ghat_list = [Gk[0].copy()] r = 0 nb_updated += 1 print('the graph is updated', nb_updated, 'times.') nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'), with_labels=True) ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG") plt.show() found = True if not found: r += 1 dis_of_each_itr.append(dhat) itr_total += 1 print('\nthe k shortest distances are', dis_k) print('the shortest distances for previous iterations are', dis_of_each_itr) print('\n\nthe graph is updated', nb_updated, 'times.') print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.') print('distances in kernel space:', dis_of_each_itr, '\n') return dhat, ghat_list, dis_of_each_itr[-1], nb_updated, nb_updated_k
def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_method, graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/py-graph/datasets/monoterpenoides/', edit_costs=None, group_min=None, dataset='monoterpenoides', cost='CONSTANT', parallel=True): dataset = dataset.lower() # # compute distances in kernel space. # dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, # Kmatrix=None, gkernel=gkernel) # # ged. # gmfile = np.load('results/test_k_closest_graphs/ged_mat.fit_on_whole_dataset.with_medians.gm.npz') # ged_mat = gmfile['ged_mat'] # dis_mat = ged_mat[0:len(Gn), 0:len(Gn)] # # choose k closest graphs # time0 = time.time() # sod_ks_min, group_min = get_closest_k_graphs(dis_mat, k, parallel) # time_spent = time.time() - time0 # print('closest graphs:', sod_ks_min, group_min) # print('time spent:', time_spent) # group_min = (12, 13, 22, 29) # closest w.r.t path kernel # group_min = (77, 85, 160, 171) # closest w.r.t ged # group_min = (0,1,2,3,4,5,6,7,8,9,10,11) # closest w.r.t treelet kernel Gn_median = [Gn[g].copy() for g in group_min] # fit edit costs. if fit_method == 'random': # random if cost == 'LETTER': edit_cost_constant = random.sample(range(1, 10), 3) edit_cost_constant = [item * 0.1 for item in edit_cost_constant] elif cost == 'LETTER2': random.seed(time.time()) edit_cost_constant = random.sample(range(1, 10), 5) # edit_cost_constant = [item * 0.1 for item in edit_cost_constant] else: edit_cost_constant = random.sample(range(1, 10), 6) print('edit costs used:', edit_cost_constant) elif fit_method == 'expert': # expert edit_cost_constant = [3, 3, 1, 3, 3, 1] elif fit_method == 'k-graphs': itr_max = 6 if cost == 'LETTER': init_costs = [0.9, 1.7, 0.75] elif cost == 'LETTER2': init_costs = [0.675, 0.675, 0.75, 0.425, 0.425] else: init_costs = [3, 3, 1, 3, 3, 1] algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1' params_ged = {'lib': 'gedlibpy', 'cost': cost, 'method': 'IPFP', 'algo_options': algo_options, 'stabilizer': None} # fit on k-graph subset edit_cost_constant, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn_median, node_label, edge_label, gkernel, itr_max, params_ged=params_ged, init_costs=init_costs, dataset=dataset, parallel=True) elif fit_method == 'whole-dataset': itr_max = 6 if cost == 'LETTER': init_costs = [0.9, 1.7, 0.75] elif cost == 'LETTER2': init_costs = [0.675, 0.675, 0.75, 0.425, 0.425] else: init_costs = [3, 3, 1, 3, 3, 1] algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1' params_ged = {'lib': 'gedlibpy', 'cost': cost, 'method': 'IPFP', 'algo_options': algo_options, 'stabilizer': None} # fit on all subset edit_cost_constant, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, params_ged=params_ged, init_costs=init_costs, dataset=dataset, parallel=True) elif fit_method == 'precomputed': edit_cost_constant = edit_costs # compute set median and gen median using IAM (C++ through bash). group_fnames = [Gn[g].graph['filename'] for g in group_min] sod_sm, sod_gm, fname_sm, fname_gm = iam_bash(group_fnames, edit_cost_constant, cost=cost, graph_dir=graph_dir, dataset=dataset) # compute distances in kernel space. Gn_median = [Gn[g].copy() for g in group_min] set_median = loadGXL(fname_sm) gen_median = loadGXL(fname_gm) # print(gen_median.nodes(data=True)) # print(gen_median.edges(data=True)) if dataset == 'letter': for g in Gn_median: reform_attributes(g) reform_attributes(set_median) reform_attributes(gen_median) # compute distance in kernel space for set median. Kmatrix_sm = compute_kernel([set_median] + Gn_median, gkernel, None if dataset == 'letter' else 'chem', None if dataset == 'letter' else 'valence', False) dis_k_sm = dis_gstar(0, range(1, 1+len(Gn_median)), [1 / len(Gn_median)] * len(Gn_median), Kmatrix_sm, withterm3=False) # print(gen_median.nodes(data=True)) # print(gen_median.edges(data=True)) # print(set_median.nodes(data=True)) # print(set_median.edges(data=True)) # compute distance in kernel space for generalized median. Kmatrix_gm = compute_kernel([gen_median] + Gn_median, gkernel, None if dataset == 'letter' else 'chem', None if dataset == 'letter' else 'valence', False) dis_k_gm = dis_gstar(0, range(1, 1+len(Gn_median)), [1 / len(Gn_median)] * len(Gn_median), Kmatrix_gm, withterm3=False) # compute distance in kernel space for each graph in median set. dis_k_gi = [] for idx in range(len(Gn_median)): dis_k_gi.append(dis_gstar(idx+1, range(1, 1+len(Gn_median)), [1 / len(Gn_median)] * len(Gn_median), Kmatrix_gm, withterm3=False)) print('sod_sm:', sod_sm) print('sod_gm:', sod_gm) print('dis_k_sm:', dis_k_sm) print('dis_k_gm:', dis_k_gm) print('dis_k_gi:', dis_k_gi) idx_dis_k_gi_min = np.argmin(dis_k_gi) dis_k_gi_min = dis_k_gi[idx_dis_k_gi_min] print('index min dis_k_gi:', group_min[idx_dis_k_gi_min]) print('min dis_k_gi:', dis_k_gi_min) return sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, group_min[idx_dis_k_gi_min]
def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l_max, gkernel, epsilon=0.001, InitIAMWithAllDk=False, InitRandomWithAllDk=True, params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 'ite_max': 50, 'epsilon': 0.001, 'removeNodes': True, 'connected': False}, params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 'method': 'IPFP', 'edit_cost_constant': [], 'stabilizer': 'min', 'repeat': 50}): """This function constructs graph pre-image by the iterative pre-image framework in reference [1], algorithm 1, where new graphs are generated randomly and by the IAM algorithm in reference [2]. notes ----- Every time a set of n better graphs is acquired, their distances in kernel space are compared with the k nearest ones, and the k nearest distances from the k+n distances will be used as the new ones. """ Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init] # compute k nearest neighbors of phi in DN. dis_all = [] # distance between g_star and each graph. term3 = 0 for i1, a1 in enumerate(alpha): for i2, a2 in enumerate(alpha): term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout): dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3) dis_all.append(dtemp) # sort sort_idx = np.argsort(dis_all) dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist()) ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of psi in DN if dis_k[0] == 0: # the exact pre-image. print('The exact pre-image is found from the input dataset.') return 0, ghat_list, 0, 0 dhat = dis_k[0] # the nearest distance # for g in ghat_list: # draw_Letter_graph(g) # nx.draw_networkx(g) # plt.show() # print(g.nodes(data=True)) # print(g.edges(data=True)) Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors # for gi in Gk: # nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) ## nx.draw_networkx(gi) # plt.show() ## draw_Letter_graph(g) # print(gi.nodes(data=True)) # print(gi.edges(data=True)) r = 0 itr_total = 0 dis_of_each_itr = [dhat] nb_updated_iam = 0 nb_updated_k_iam = 0 nb_updated_random = 0 nb_updated_k_random = 0 # is_iam_duplicate = False while r < r_max: # and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon: print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-') print('Current preimage iteration =', r) print('Total preimage iteration =', itr_total, '\n') found_iam = False Gn_nearest_median = [g.copy() for g in Gk] if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM. ghat_new_list = [] for g_tmp in Gk: Gn_nearest_init = [g_tmp.copy()] ghat_new_list_tmp, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, params_ged=params_ged, **params_iam) ghat_new_list += ghat_new_list_tmp else: # only the best graph in D_k is used to initialize IAM. Gn_nearest_init = [g.copy() for g in Gk] ghat_new_list, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, params_ged=params_ged, **params_iam) # for g in g_tmp_list: # nx.draw_networkx(g) # plt.show() # draw_Letter_graph(g) # print(g.nodes(data=True)) # print(g.edges(data=True)) # compute distance between \psi and the new generated graphs. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False) dhat_new_list = [] for idx, g_tmp in enumerate(ghat_new_list): # @todo: the term3 below could use the one at the beginning of the function. dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), len(ghat_new_list) + len(Gn_median) + 1), alpha, knew, withterm3=False)) # find the new k nearest graphs. for idx_g, ghat_new in enumerate(ghat_new_list): dhat_new = dhat_new_list[idx_g] # if the new distance is smaller than the max of D_k. if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon: # check if the new distance is the same as one in D_k. is_duplicate = False for dis_tmp in dis_k[1:-1]: if np.abs(dhat_new - dis_tmp) < epsilon: is_duplicate = True print('IAM: duplicate k nearest graph generated.') break if not is_duplicate: if np.abs(dhat_new - dhat) < epsilon: print('IAM: I am equal!') # dhat = dhat_new # ghat_list = [ghat_new.copy()] else: print('IAM: we got better k nearest neighbors!') nb_updated_k_iam += 1 print('the k nearest neighbors are updated', nb_updated_k_iam, 'times.') dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance. Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph. sort_idx = np.argsort(dis_k) dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances. Gk = [Gk[idx] for idx in sort_idx[0:k]] if dhat_new < dhat: print('IAM: I have smaller distance!') print(str(dhat) + '->' + str(dhat_new)) dhat = dhat_new ghat_list = [Gk[0].copy()] r = 0 nb_updated_iam += 1 print('the graph is updated by IAM', nb_updated_iam, 'times.') nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'), with_labels=True) ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG") plt.show() found_iam = True # when new distance is not smaller than the max of D_k, use random generation. if not found_iam: print('Distance not better, switching to random generation now.') print(str(dhat) + '->' + str(dhat_new)) if InitRandomWithAllDk: # use all k nearest graphs as the initials. init_list = [g_init.copy() for g_init in Gk] else: # use just the nearest graph as the initial. init_list = [Gk[0].copy()] # number of edges to be changed. if len(init_list) == 1: # @todo what if the log is negetive? how to choose alpha (scalar)? seems fdgs is always 1. # fdgs = dhat_new fdgs = nb_updated_random + 1 if fdgs < 1: fdgs = 1 fdgs = int(np.ceil(np.log(fdgs))) if fdgs < 1: fdgs += 1 # fdgs = nb_updated_random + 1 # @todo: fdgs_list = [fdgs] else: # @todo what if the log is negetive? how to choose alpha (scalar)? fdgs_list = np.array(dis_k[:]) if np.min(fdgs_list) < 1: fdgs_list /= dis_k[0] fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))] if np.min(fdgs_list) < 1: fdgs_list = np.array(fdgs_list) + 1 l = 0 found_random = False while l < l_max and not found_random: for idx_g, g_tmp in enumerate(init_list): # add and delete edges. ghat_new = nx.convert_node_labels_to_integers(g_tmp.copy()) # @todo: should we use just half of the adjacency matrix for undirected graphs? nb_vpairs = nx.number_of_nodes(ghat_new) * (nx.number_of_nodes(ghat_new) - 1) np.random.seed() # which edges to change. # @todo: what if fdgs is bigger than nb_vpairs? idx_change = random.sample(range(nb_vpairs), fdgs_list[idx_g] if fdgs_list[idx_g] < nb_vpairs else nb_vpairs) # idx_change = np.random.randint(0, nx.number_of_nodes(gs) * # (nx.number_of_nodes(gs) - 1), fdgs) for item in idx_change: node1 = int(item / (nx.number_of_nodes(ghat_new) - 1)) node2 = (item - node1 * (nx.number_of_nodes(ghat_new) - 1)) if node2 >= node1: # skip the self pair. node2 += 1 # @todo: is the randomness correct? if not ghat_new.has_edge(node1, node2): ghat_new.add_edge(node1, node2) # nx.draw_networkx(gs) # plt.show() # nx.draw_networkx(ghat_new) # plt.show() else: ghat_new.remove_edge(node1, node2) # nx.draw_networkx(gs) # plt.show() # nx.draw_networkx(ghat_new) # plt.show() # nx.draw_networkx(ghat_new) # plt.show() # compute distance between \psi and the new generated graph. knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False) dhat_new = dis_gstar(0, range(1, len(Gn_median) + 1), alpha, knew, withterm3=False) # @todo: the new distance is smaller or also equal? if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon: # check if the new distance is the same as one in D_k. is_duplicate = False for dis_tmp in dis_k[1:-1]: if np.abs(dhat_new - dis_tmp) < epsilon: is_duplicate = True print('Random: duplicate k nearest graph generated.') break if not is_duplicate: if np.abs(dhat_new - dhat) < epsilon: print('Random: I am equal!') # dhat = dhat_new # ghat_list = [ghat_new.copy()] else: print('Random: we got better k nearest neighbors!') print('l =', str(l)) nb_updated_k_random += 1 print('the k nearest neighbors are updated by random generation', nb_updated_k_random, 'times.') dis_k = [dhat_new] + dis_k # add the new nearest distances. Gk = [ghat_new.copy()] + Gk # add the corresponding graphs. sort_idx = np.argsort(dis_k) dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances. Gk = [Gk[idx] for idx in sort_idx[0:k]] if dhat_new < dhat: print('\nRandom: I am smaller!') print('l =', str(l)) print(dhat, '->', dhat_new) dhat = dhat_new ghat_list = [ghat_new.copy()] r = 0 nb_updated_random += 1 print('the graph is updated by random generation', nb_updated_random, 'times.') nx.draw(ghat_new, labels=nx.get_node_attributes(ghat_new, 'atom'), with_labels=True) ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG") plt.show() found_random = True break l += 1 if not found_random: # l == l_max: r += 1 dis_of_each_itr.append(dhat) itr_total += 1 print('\nthe k shortest distances are', dis_k) print('the shortest distances for previous iterations are', dis_of_each_itr) print('\n\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation', nb_updated_random, 'times.') print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam, 'times, and by random generation', nb_updated_k_random, 'times.') print('distances in kernel space:', dis_of_each_itr, '\n') return dhat, ghat_list, dis_of_each_itr[-1], \ nb_updated_iam, nb_updated_random, nb_updated_k_iam, nb_updated_k_random ############################################################################### # Old implementations. #def gk_iam(Gn, alpha): # """This function constructs graph pre-image by the iterative pre-image # framework in reference [1], algorithm 1, where the step of generating new # graphs randomly is replaced by the IAM algorithm in reference [2]. # # notes # ----- # Every time a better graph is acquired, the older one is replaced by it. # """ # pass # # compute k nearest neighbors of phi in DN. # dis_list = [] # distance between g_star and each graph. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout): # dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * # k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * # (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * # k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]) # dis_list.append(dtemp) # # # sort # sort_idx = np.argsort(dis_list) # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN # if dis_gs[0] == 0: # the exact pre-image. # print('The exact pre-image is found from the input dataset.') # return 0, g0hat # dhat = dis_gs[0] # the nearest distance # Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors # gihat_list = [] # ## i = 1 # r = 1 # while r < r_max: # print('r =', r) ## found = False # Gs_nearest = Gk + gihat_list # g_tmp = iam(Gs_nearest) # # # compute distance between \psi and the new generated graph. # knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None, # p_quit=lmbda, n_iteration=20, remove_totters=False, # n_jobs=multiprocessing.cpu_count(), verbose=False) # dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) * # knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha * # (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * # k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]) # if dnew <= dhat: # the new distance is smaller # print('I am smaller!') # dhat = dnew # g_new = g_tmp.copy() # found better graph. # gihat_list = [g_new] # dis_gs.append(dhat) # r = 0 # else: # r += 1 # # ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list) # # return dhat, ghat #def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max): # """This function constructs graph pre-image by the iterative pre-image # framework in reference [1], algorithm 1, where the step of generating new # graphs randomly is replaced by the IAM algorithm in reference [2]. # # notes # ----- # Every time a better graph is acquired, its distance in kernel space is # compared with the k nearest ones, and the k nearest distances from the k+1 # distances will be used as the new ones. # """ # # compute k nearest neighbors of phi in DN. # dis_list = [] # distance between g_star and each graph. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout): # dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix) ## dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * ## k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * ## (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * ## k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6]) # dis_list.append(dtemp) # # # sort # sort_idx = np.argsort(dis_list) # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances # g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN # if dis_gs[0] == 0: # the exact pre-image. # print('The exact pre-image is found from the input dataset.') # return 0, g0hat # dhat = dis_gs[0] # the nearest distance # ghat = g0hat.copy() # Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors # for gi in Gk: # nx.draw_networkx(gi) # plt.show() # print(gi.nodes(data=True)) # print(gi.edges(data=True)) # Gs_nearest = Gk.copy() ## gihat_list = [] # ## i = 1 # r = 1 # while r < r_max: # print('r =', r) ## found = False ## Gs_nearest = Gk + gihat_list ## g_tmp = iam(Gs_nearest) # g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1) # nx.draw_networkx(g_tmp) # plt.show() # print(g_tmp.nodes(data=True)) # print(g_tmp.edges(data=True)) # # # compute distance between \psi and the new generated graph. # gi_list = [Gn[i] for i in idx_gi] # knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False) # dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew) # ## dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * ## knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * ## alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * ## k_g1_list[1] + alpha[1] * alpha[1] * k_list[1]) # if dnew <= dhat and g_tmp != ghat: # the new distance is smaller # print('I am smaller!') # print(str(dhat) + '->' + str(dnew)) ## nx.draw_networkx(ghat) ## plt.show() ## print('->') ## nx.draw_networkx(g_tmp) ## plt.show() # # dhat = dnew # g_new = g_tmp.copy() # found better graph. # ghat = g_tmp.copy() # dis_gs.append(dhat) # add the new nearest distance. # Gs_nearest.append(g_new) # add the corresponding graph. # sort_idx = np.argsort(dis_gs) # dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances. # Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] # r = 0 # else: # r += 1 # # return dhat, ghat #def gk_iam_nearest_multi(Gn, alpha, idx_gi, Kmatrix, k, r_max): # """This function constructs graph pre-image by the iterative pre-image # framework in reference [1], algorithm 1, where the step of generating new # graphs randomly is replaced by the IAM algorithm in reference [2]. # # notes # ----- # Every time a set of n better graphs is acquired, their distances in kernel space are # compared with the k nearest ones, and the k nearest distances from the k+n # distances will be used as the new ones. # """ # Gn_median = [Gn[idx].copy() for idx in idx_gi] # # compute k nearest neighbors of phi in DN. # dis_list = [] # distance between g_star and each graph. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout): # dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix) ## dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * ## k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * ## (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * ## k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6]) # dis_list.append(dtemp) # # # sort # sort_idx = np.argsort(dis_list) # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances # nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) # g0hat_list = [Gn[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN # if dis_gs[0] == 0: # the exact pre-image. # print('The exact pre-image is found from the input dataset.') # return 0, g0hat_list # dhat = dis_gs[0] # the nearest distance # ghat_list = [g.copy() for g in g0hat_list] # for g in ghat_list: # nx.draw_networkx(g) # plt.show() # print(g.nodes(data=True)) # print(g.edges(data=True)) # Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors # for gi in Gk: # nx.draw_networkx(gi) # plt.show() # print(gi.nodes(data=True)) # print(gi.edges(data=True)) # Gs_nearest = Gk.copy() ## gihat_list = [] # ## i = 1 # r = 1 # while r < r_max: # print('r =', r) ## found = False ## Gs_nearest = Gk + gihat_list ## g_tmp = iam(Gs_nearest) # g_tmp_list = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( # Gn_median, Gs_nearest, c_ei=1, c_er=1, c_es=1) # for g in g_tmp_list: # nx.draw_networkx(g) # plt.show() # print(g.nodes(data=True)) # print(g.edges(data=True)) # # # compute distance between \psi and the new generated graphs. # gi_list = [Gn[i] for i in idx_gi] # knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False) # dnew_list = [] # for idx, g_tmp in enumerate(g_tmp_list): # dnew_list.append(dis_gstar(idx, range(len(g_tmp_list), # len(g_tmp_list) + len(gi_list) + 1), alpha, knew)) # ## dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * ## knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * ## alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * ## k_g1_list[1] + alpha[1] * alpha[1] * k_list[1]) # # # find the new k nearest graphs. # dis_gs = dnew_list + dis_gs # add the new nearest distances. # Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs. # sort_idx = np.argsort(dis_gs) # if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0: # print('We got better k nearest neighbors! Hurray!') # dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances. # print(dis_gs[-1]) # Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] # nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) # if len([i for i in sort_idx[0:nb_best] if i < len(dnew_list)]) > 0: # print('I have smaller or equal distance!') # dhat = dis_gs[0] # print(str(dhat) + '->' + str(dhat)) # idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist() # ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list] # for g in ghat_list: # nx.draw_networkx(g) # plt.show() # print(g.nodes(data=True)) # print(g.edges(data=True)) # r = 0 # else: # r += 1 # # return dhat, ghat_list
def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel): Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init] # compute k nearest neighbors of phi in DN. dis_list = [] # distance between g_star and each graph. term3 = 0 for i1, a1 in enumerate(alpha): for i2, a2 in enumerate(alpha): term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout): dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3) dis_list.append(dtemp) # print(np.max(dis_list)) # print(np.min(dis_list)) # print(np.min([item for item in dis_list if item != 0])) # print(np.mean(dis_list)) # sort sort_idx = np.argsort(dis_list) dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best] ] # the nearest neighbors of phi in DN if dis_gs[0] == 0: # the exact pre-image. print('The exact pre-image is found from the input dataset.') return 0, g0hat_list[0], 0 dhat = dis_gs[0] # the nearest distance # ghat_list = [g.copy() for g in g0hat_list] # for g in ghat_list: # draw_Letter_graph(g) # nx.draw_networkx(g) # plt.show() # print(g.nodes(data=True)) # print(g.edges(data=True)) Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors # for gi in Gk: ## nx.draw_networkx(gi) ## plt.show() # draw_Letter_graph(g) # print(gi.nodes(data=True)) # print(gi.edges(data=True)) Gs_nearest = [g.copy() for g in Gk] gihat_list = [] dihat_list = [] # i = 1 r = 0 # sod_list = [dhat] # found = False dis_of_each_itr = [dhat] nb_updated = 0 g_best = [] while r < r_max: print('\nr =', r) print('itr for gk =', nb_updated, '\n') found = False dis_bests = dis_gs + dihat_list # @todo what if the log is negetive? how to choose alpha (scalar)? fdgs_list = np.array(dis_bests) if np.min(fdgs_list) < 1: fdgs_list /= np.min(dis_bests) fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))] if np.min(fdgs_list) < 1: fdgs_list = np.array(fdgs_list) + 1 for ig, gs in enumerate(Gs_nearest + gihat_list): # nx.draw_networkx(gs) # plt.show() for trail in range(0, l): # for trail in tqdm(range(0, l), desc='l loops', file=sys.stdout): # add and delete edges. gtemp = gs.copy() np.random.seed() # which edges to change. # @todo: should we use just half of the adjacency matrix for undirected graphs? nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1) # @todo: what if fdgs is bigger than nb_vpairs? idx_change = random.sample( range(nb_vpairs), fdgs_list[ig] if fdgs_list[ig] < nb_vpairs else nb_vpairs) # idx_change = np.random.randint(0, nx.number_of_nodes(gs) * # (nx.number_of_nodes(gs) - 1), fdgs) for item in idx_change: node1 = int(item / (nx.number_of_nodes(gs) - 1)) node2 = (item - node1 * (nx.number_of_nodes(gs) - 1)) if node2 >= node1: # skip the self pair. node2 += 1 # @todo: is the randomness correct? if not gtemp.has_edge(node1, node2): gtemp.add_edge(node1, node2) # nx.draw_networkx(gs) # plt.show() # nx.draw_networkx(gtemp) # plt.show() else: gtemp.remove_edge(node1, node2) # nx.draw_networkx(gs) # plt.show() # nx.draw_networkx(gtemp) # plt.show() # nx.draw_networkx(gtemp) # plt.show() # compute distance between \psi and the new generated graph. # knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None, # p_quit=lmbda, n_iteration=20, remove_totters=False, # n_jobs=multiprocessing.cpu_count(), verbose=False) knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False) dnew = dis_gstar(0, range(1, len(Gn_median) + 1), alpha, knew, withterm3=False) if dnew <= dhat: # @todo: the new distance is smaller or also equal? if dnew < dhat: print('\nI am smaller!') print('ig =', str(ig), ', l =', str(trail)) print(dhat, '->', dnew) nb_updated += 1 elif dnew == dhat: print('I am equal!') # nx.draw_networkx(gtemp) # plt.show() # print(gtemp.nodes(data=True)) # print(gtemp.edges(data=True)) dhat = dnew gnew = gtemp.copy() found = True # found better graph. if found: r = 0 gihat_list = [gnew] dihat_list = [dhat] else: r += 1 dis_of_each_itr.append(dhat) print('the shortest distances for previous iterations are', dis_of_each_itr) # dis_best.append(dhat) g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) print('distances in kernel space:', dis_of_each_itr, '\n') return dhat, g_best, nb_updated
def test_iam_fitdistance(): ds = { 'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {} } # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) # Gn = Gn[0:50] # remove_edges(Gn) gkernel = 'marginalizedkernel' node_label = 'atom' edge_label = 'bond_type' # lmbda = 0.03 # termination probalility # # parameters for GED function # c_vi = 0.037 # c_vr = 0.038 # c_vs = 0.075 # c_ei = 0.001 # c_er = 0.001 # c_es = 0.0 # ite_max_iam = 50 # epsilon_iam = 0.001 # removeNodes = False # connected_iam = False # # parameters for IAM function # ged_cost = 'CONSTANT' # ged_method = 'IPFP' # edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] # ged_stabilizer = 'min' # ged_repeat = 50 # params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, # 'edit_cost_constant': edit_cost_constant, # 'stabilizer': ged_stabilizer, 'repeat': ged_repeat} # parameters for GED function c_vi = 4 c_vr = 4 c_vs = 2 c_ei = 1 c_er = 1 c_es = 1 ite_max_iam = 50 epsilon_iam = 0.001 removeNodes = False connected_iam = False # parameters for IAM function ged_cost = 'CHEM_1' ged_method = 'IPFP' edit_cost_constant = [] ged_stabilizer = 'min' ged_repeat = 50 params_ged = { 'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 'edit_cost_constant': edit_cost_constant, 'stabilizer': ged_stabilizer, 'repeat': ged_repeat } # find out all the graphs classified to positive group 1. idx_dict = get_same_item_indices(y_all) Gn = [Gn[i] for i in idx_dict[1]] # number of graphs; we what to compute the median of these graphs. # nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] nb_median_range = [10] # # compute Gram matrix. # time0 = time.time() # km = compute_kernel(Gn, gkernel, True) # time_km = time.time() - time0 # # write Gram matrix to file. # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) time_list = [] dis_ks_min_list = [] dis_ks_gen_median_list = [] sod_gs_list = [] # sod_gs_min_list = [] # nb_updated_list = [] # nb_updated_k_list = [] g_best = [] for nb_median in nb_median_range: print('\n-------------------------------------------------------') print('number of median graphs =', nb_median) random.seed(1) idx_rdm = random.sample(range(len(Gn)), nb_median) print('graphs chosen:', idx_rdm) Gn_median = [Gn[idx].copy() for idx in idx_rdm] Gn_candidate = [g.copy() for g in Gn_median] # for g in Gn_median: # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) ## plt.savefig("results/preimage_mix/mutag.png", format="PNG") # plt.show() # plt.clf() ################################################################### # gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') # km_tmp = gmfile['gm'] # time_km = gmfile['gmtime'] # # modify mixed gram matrix. # km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) # for i in range(len(Gn)): # for j in range(i, len(Gn)): # km[i, j] = km_tmp[i, j] # km[j, i] = km[i, j] # for i in range(len(Gn)): # for j, idx in enumerate(idx_rdm): # km[i, len(Gn) + j] = km[i, idx] # km[len(Gn) + j, i] = km[i, idx] # for i, idx1 in enumerate(idx_rdm): # for j, idx2 in enumerate(idx_rdm): # km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] ################################################################### alpha_range = [1 / nb_median] * nb_median time0 = time.time() G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \ = iam_upgraded(Gn_median, Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, params_ged=params_ged) time_total = time.time() - time0 print('\ntime: ', time_total) time_list.append(time_total) # compute distance between \psi and the new generated graphs. knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label, edge_label, False) dhat_new_list = [] for idx, g_tmp in enumerate(G_gen_median_list): # @todo: the term3 below could use the one at the beginning of the function. dhat_new_list.append( dis_gstar(idx, range(len(G_gen_median_list), len(G_gen_median_list) + len(Gn_median) + 1), alpha_range, knew, withterm3=False)) print('\nsmallest distance in kernel space: ', dhat_new_list[0]) dis_ks_min_list.append(dhat_new_list[0]) g_best.append(G_gen_median_list[0]) # show the best graph and save it to file. # print('the shortest distance is', dhat) print('one of the possible corresponding pre-images is') nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), with_labels=True) plt.show() # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + # plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) + # '.png', format="PNG") plt.clf() # print(ghat_list[0].nodes(data=True)) # print(ghat_list[0].edges(data=True)) sod_gs_list.append(sod_gen_median) # sod_gs_min_list.append(np.min(sod_gen_median)) print('\nsmallest sod in graph space: ', sod_gen_median) print('\nsmallest sod of set median in graph space: ', sod_set_median) print('\nsods in graph space: ', sod_gs_list) # print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) print( '\nsmallest distance in kernel space for each set of median graphs: ', dis_ks_min_list) # print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', # nb_updated_list) # print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', # nb_updated_k_list) print('\ntimes:', time_list)
def test_iam_letter_h(): from median import draw_Letter_graph ds = { 'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', 'extra_params': {} } # node nsymb # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', # 'extra_params': {}} # node nsymb # Gn = Gn[0:50] Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) gkernel = 'structuralspkernel' # parameters for GED function from the IAM paper. c_vi = 3 c_vr = 3 c_vs = 1 c_ei = 3 c_er = 3 c_es = 1 ite_max_iam = 50 epsilon_iam = 0.001 removeNodes = False connected_iam = False # parameters for IAM function # ged_cost = 'CONSTANT' ged_cost = 'LETTER' ged_method = 'IPFP' # edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] edit_cost_constant = [] ged_stabilizer = 'min' ged_repeat = 50 params_ged = { 'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 'edit_cost_constant': edit_cost_constant, 'stabilizer': ged_stabilizer, 'repeat': ged_repeat } # classify graphs according to letters. time_list = [] dis_ks_min_list = [] sod_gs_list = [] g_best = [] sod_set_median_list = [] idx_dict = get_same_item_indices(y_all) for letter in idx_dict: print('\n-------------------------------------------------------') print('letter', letter) Gn_let = [Gn[i].copy() for i in idx_dict[letter]] time_list.append([]) dis_ks_min_list.append([]) sod_gs_list.append([]) g_best.append([]) sod_set_median_list.append([]) for repeat in range(50): idx_rdm = random.sample(range(len(Gn_let)), 50) print('graphs chosen:', idx_rdm) Gn_median = [Gn_let[idx].copy() for idx in idx_rdm] Gn_candidate = [g.copy() for g in Gn_median] alpha_range = [1 / len(Gn_median)] * len(Gn_median) time0 = time.time() ghat_new_list, sod_min, sod_set_median = iam_upgraded( Gn_median, Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, params_ged=params_ged) time_total = time.time() - time0 print('\ntime: ', time_total) time_list[-1].append(time_total) g_best[-1].append(ghat_new_list[0]) sod_set_median_list[-1].append(sod_set_median) print('\nsmallest sod of the set median:', sod_set_median) sod_gs_list[-1].append(sod_min) print('\nsmallest sod in graph space:', sod_min) # show the best graph and save it to file. print('one of the possible corresponding pre-images is') draw_Letter_graph(ghat_new_list[0], savepath='results/iam/paper_compare/') # compute distance between \psi and the new generated graphs. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False) dhat_new_list = [] for idx, g_tmp in enumerate(ghat_new_list): # @todo: the term3 below could use the one at the beginning of the function. dhat_new_list.append( dis_gstar(idx, range(len(ghat_new_list), len(ghat_new_list) + len(Gn_median) + 1), alpha_range, knew, withterm3=False)) print('\nsmallest distance in kernel space: ', dhat_new_list[0]) dis_ks_min_list[-1].append(dhat_new_list[0]) print('\nsods of the set median for this letter:', sod_set_median_list[-1]) print('\nsods in graph space for this letter:', sod_gs_list[-1]) print('\nsmallest distances in kernel space for this letter:', dis_ks_min_list[-1]) print('\ntimes for this letter:', time_list[-1]) sod_set_median_list[-1] = np.mean(sod_set_median_list[-1]) sod_gs_list[-1] = np.mean(sod_gs_list[-1]) dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1]) time_list[-1] = np.mean(time_list[-1]) print('\nmean sods of the set median for each letter:', sod_set_median_list) print('\nmean sods in graph space for each letter:', sod_gs_list) print('\nmean smallest distances in kernel space for each letter:', dis_ks_min_list) print('\nmean times for each letter:', time_list) print('\nmean sods of the set median of all:', np.mean(sod_set_median_list)) print('\nmean sods in graph space of all:', np.mean(sod_gs_list)) print('\nmean smallest distances in kernel space of all:', np.mean(dis_ks_min_list)) print('\nmean times of all:', np.mean(time_list))
def test_iam_mutag(): ds = { 'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {} } # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) # Gn = Gn[0:50] gkernel = 'untilhpathkernel' node_label = 'atom' edge_label = 'bond_type' # parameters for GED function from the IAM paper. # fitted edit costs. c_vi = 0.03523843108436513 c_vr = 0.03347339739350128 c_vs = 0.06871290673612238 c_ei = 0.08591999846720685 c_er = 0.07962086440894103 c_es = 0.08596855855478233 # unfitted edit costs. # c_vi = 3 # c_vr = 3 # c_vs = 1 # c_ei = 3 # c_er = 3 # c_es = 1 ite_max_iam = 50 epsilon_iam = 0.001 removeNodes = False connected_iam = False # parameters for IAM function # ged_cost = 'CONSTANT' ged_cost = 'CONSTANT' ged_method = 'IPFP' edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] # edit_cost_constant = [] ged_stabilizer = 'min' ged_repeat = 50 params_ged = { 'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 'edit_cost_constant': edit_cost_constant, 'stabilizer': ged_stabilizer, 'repeat': ged_repeat } # classify graphs according to letters. time_list = [] dis_ks_min_list = [] dis_ks_set_median_list = [] sod_gs_list = [] g_best = [] sod_set_median_list = [] sod_list_list = [] idx_dict = get_same_item_indices(y_all) for y_class in idx_dict: print('\n-------------------------------------------------------') print('class of y:', y_class) Gn_class = [Gn[i].copy() for i in idx_dict[y_class]] time_list.append([]) dis_ks_min_list.append([]) dis_ks_set_median_list.append([]) sod_gs_list.append([]) g_best.append([]) sod_set_median_list.append([]) for repeat in range(50): idx_rdm = random.sample(range(len(Gn_class)), 10) print('graphs chosen:', idx_rdm) Gn_median = [Gn_class[idx].copy() for idx in idx_rdm] Gn_candidate = [g.copy() for g in Gn_median] alpha_range = [1 / len(Gn_median)] * len(Gn_median) time0 = time.time() G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \ = iam_upgraded(Gn_median, Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, params_ged=params_ged) time_total = time.time() - time0 print('\ntime: ', time_total) time_list[-1].append(time_total) g_best[-1].append(G_gen_median_list[0]) sod_set_median_list[-1].append(sod_set_median) print('\nsmallest sod of the set median:', sod_set_median) sod_gs_list[-1].append(sod_gen_median) print('\nsmallest sod in graph space:', sod_gen_median) sod_list_list.append(sod_list) # show the best graph and save it to file. print('one of the possible corresponding pre-images is') nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), with_labels=True) # plt.show() # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + # plt.savefig('results/iam/paper_compare/mutag_y' + str(y_class) + # '_repeat' + str(repeat) + '_' + str(time.time()) + # '.png', format="PNG") plt.clf() # print(G_gen_median_list[0].nodes(data=True)) # print(G_gen_median_list[0].edges(data=True)) # compute distance between \psi and the set median graph. knew_set_median = compute_kernel(G_set_median_list + Gn_median, gkernel, node_label, edge_label, False) dhat_new_set_median_list = [] for idx, g_tmp in enumerate(G_set_median_list): # @todo: the term3 below could use the one at the beginning of the function. dhat_new_set_median_list.append( dis_gstar(idx, range( len(G_set_median_list), len(G_set_median_list) + len(Gn_median) + 1), alpha_range, knew_set_median, withterm3=False)) print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0]) dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0]) # compute distance between \psi and the new generated graphs. knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label, edge_label, False) dhat_new_list = [] for idx, g_tmp in enumerate(G_gen_median_list): # @todo: the term3 below could use the one at the beginning of the function. dhat_new_list.append( dis_gstar(idx, range( len(G_gen_median_list), len(G_gen_median_list) + len(Gn_median) + 1), alpha_range, knew, withterm3=False)) print('\nsmallest distance in kernel space: ', dhat_new_list[0]) dis_ks_min_list[-1].append(dhat_new_list[0]) print('\nsods of the set median for this class:', sod_set_median_list[-1]) print('\nsods in graph space for this class:', sod_gs_list[-1]) print('\ndistance in kernel space of set median for this class:', dis_ks_set_median_list[-1]) print('\nsmallest distances in kernel space for this class:', dis_ks_min_list[-1]) print('\ntimes for this class:', time_list[-1]) sod_set_median_list[-1] = np.mean(sod_set_median_list[-1]) sod_gs_list[-1] = np.mean(sod_gs_list[-1]) dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1]) dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1]) time_list[-1] = np.mean(time_list[-1]) print() print('\nmean sods of the set median for each class:', sod_set_median_list) print('\nmean sods in graph space for each class:', sod_gs_list) print('\ndistances in kernel space of set median for each class:', dis_ks_set_median_list) print('\nmean smallest distances in kernel space for each class:', dis_ks_min_list) print('\nmean times for each class:', time_list) print('\nmean sods of the set median of all:', np.mean(sod_set_median_list)) print('\nmean sods in graph space of all:', np.mean(sod_gs_list)) print('\nmean distances in kernel space of set median of all:', np.mean(dis_ks_set_median_list)) print('\nmean smallest distances in kernel space of all:', np.mean(dis_ks_min_list)) print('\nmean times of all:', np.mean(time_list)) nb_better_sods = 0 nb_worse_sods = 0 nb_same_sods = 0 for sods in sod_list_list: if sods[0] > sods[-1]: nb_better_sods += 1 elif sods[0] < sods[-1]: nb_worse_sods += 1 else: nb_same_sods += 1 print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods), 'are getting better,', str(nb_worse_sods), 'are getting worse,', str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)), 'sods are improved.')