def run_NAIE(config): graph = load_graph(config['dataset'], labels_is_onehot=False) if config['task'] == 'lp': graph.G = remove_edges( graph.G, config['lp_test_path'] + config['dataset'] + "_lp_test.edgelist") print("Left edges in G: {}".format(graph.G.number_of_edges())) test_pairs, test_labels = read_test_links(config['lp_test_path'] + config['dataset'] + "_lp_test.edgelist") config['link_test_pairs'] = [ (edges[0], edges[1], label) for edges, label in zip(test_pairs, test_labels) ] y = graph.labels X = graph.features A = graph.adjcency_matrix(is_sparse=False) C = np.concatenate([A, config['lambda'] * X], axis=1) smooth_X = smooth(A, X, 1.0) smooth_A = smooth(A, A, 1.0) if config['strategy'] == 'nc': gamma_adj = 1 - get_balance_coefficient(graph.G, smooth_A) gamma_attr = 1 - get_balance_coefficient(graph.G, smooth_X) elif config['strategy'] == 'sw': omega = get_omega(graph.G) omega = abs(omega) if omega > 1: omega = 1.0 gamma_adj = omega gamma_attr = omega print("gamma_adj={:4f}, gamma_attr={:.4f}".format(gamma_adj, gamma_attr)) ada_smooth_A = smooth(A, A, gamma_adj) ada_smooth_X = smooth(A, X, gamma_attr) target = np.concatenate([ada_smooth_A, config['lambda'] * ada_smooth_X], axis=1) config['struct'][0] = C.shape[1] data = {'C': C, 'target': target, 'adj': A, 'y': y} model = NAIE(config) model.train(data)
def test_anycosts(): ds = { 'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {} } # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) # Gn = Gn[0:10] remove_edges(Gn) gkernel = 'marginalizedkernel' itr_max = 10 edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \ nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, gkernel, itr_max) total_time = np.sum(time_list) print('\nedit_costs:', edit_costs) print('\nresidual_list:', residual_list) print('\nedit_cost_list:', edit_cost_list) print('\ndistance matrix in kernel space:', dis_k_mat) print('\nged matrix:', ged_mat) print('\ntotal time:', total_time) print('\nnb_cost_mat:', nb_cost_mat_list[-1]) np.savez('results/fit_distance.any_costs.gm', edit_costs=edit_costs, residual_list=residual_list, edit_cost_list=edit_cost_list, dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, total_time=total_time, nb_cost_mat_list=nb_cost_mat_list) # # normalized distance matrices. # gmfile = np.load('results/fit_distance.any_costs.gm.npz') # edit_costs = gmfile['edit_costs'] # residual_list = gmfile['residual_list'] # edit_cost_list = gmfile['edit_cost_list'] # dis_k_mat = gmfile['dis_k_mat'] # ged_mat = gmfile['ged_mat'] # total_time = gmfile['total_time'] ## nb_cost_mat_list = gmfile['nb_cost_mat_list'] norm_dis_k_mat = normalize_distance_matrix(dis_k_mat) plt.imshow(norm_dis_k_mat) plt.colorbar() plt.savefig('results/norm_dis_k_mat.any_costs' + '.eps', format='eps', dpi=300) # plt.savefig('results/norm_dis_k_mat.any_costs' + '.png', format='png') # plt.show() plt.clf() norm_ged_mat = normalize_distance_matrix(ged_mat) plt.imshow(norm_ged_mat) plt.colorbar() plt.savefig('results/norm_ged_mat.any_costs' + '.eps', format='eps', dpi=300) # plt.savefig('results/norm_ged_mat.any_costs' + '.png', format='png') # plt.show() plt.clf() norm_diff = norm_ged_mat - norm_dis_k_mat plt.imshow(norm_diff) plt.colorbar() plt.savefig('results/diff_mat_norm_ged_dis_k.any_costs' + '.eps', format='eps', dpi=300) # plt.savefig('results/diff_mat_norm_ged_dis_k.any_costs' + '.png', format='png') # plt.show() plt.clf()
def test_iam_median_nb(): ds = { 'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {} } # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) # Gn = Gn[0:50] remove_edges(Gn) gkernel = 'marginalizedkernel' lmbda = 0.03 # termination probalility # # parameters for GED function # c_vi = 0.037 # c_vr = 0.038 # c_vs = 0.075 # c_ei = 0.001 # c_er = 0.001 # c_es = 0.0 # ite_max_iam = 50 # epsilon_iam = 0.001 # removeNodes = False # connected_iam = False # # parameters for IAM function # ged_cost = 'CONSTANT' # ged_method = 'IPFP' # edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] # ged_stabilizer = 'min' # ged_repeat = 50 # params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, # 'edit_cost_constant': edit_cost_constant, # 'stabilizer': ged_stabilizer, 'repeat': ged_repeat} # parameters for GED function c_vi = 4 c_vr = 4 c_vs = 2 c_ei = 1 c_er = 1 c_es = 1 ite_max_iam = 50 epsilon_iam = 0.001 removeNodes = False connected_iam = False # parameters for IAM function ged_cost = 'CHEM_1' ged_method = 'IPFP' edit_cost_constant = [] ged_stabilizer = 'min' ged_repeat = 50 params_ged = { 'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 'edit_cost_constant': edit_cost_constant, 'stabilizer': ged_stabilizer, 'repeat': ged_repeat } # find out all the graphs classified to positive group 1. idx_dict = get_same_item_indices(y_all) Gn = [Gn[i] for i in idx_dict[1]] # number of graphs; we what to compute the median of these graphs. # nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] nb_median_range = [len(Gn)] # # compute Gram matrix. # time0 = time.time() # km = compute_kernel(Gn, gkernel, True) # time_km = time.time() - time0 # # write Gram matrix to file. # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) time_list = [] dis_ks_min_list = [] sod_gs_list = [] # sod_gs_min_list = [] # nb_updated_list = [] # nb_updated_k_list = [] g_best = [] for nb_median in nb_median_range: print('\n-------------------------------------------------------') print('number of median graphs =', nb_median) random.seed(1) idx_rdm = random.sample(range(len(Gn)), nb_median) print('graphs chosen:', idx_rdm) Gn_median = [Gn[idx].copy() for idx in idx_rdm] Gn_candidate = [g.copy() for g in Gn] # for g in Gn_median: # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) ## plt.savefig("results/preimage_mix/mutag.png", format="PNG") # plt.show() # plt.clf() ################################################################### # gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') # km_tmp = gmfile['gm'] # time_km = gmfile['gmtime'] # # modify mixed gram matrix. # km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) # for i in range(len(Gn)): # for j in range(i, len(Gn)): # km[i, j] = km_tmp[i, j] # km[j, i] = km[i, j] # for i in range(len(Gn)): # for j, idx in enumerate(idx_rdm): # km[i, len(Gn) + j] = km[i, idx] # km[len(Gn) + j, i] = km[i, idx] # for i, idx1 in enumerate(idx_rdm): # for j, idx2 in enumerate(idx_rdm): # km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] ################################################################### alpha_range = [1 / nb_median] * nb_median time0 = time.time() ghat_new_list, sod_min = iam_upgraded(Gn_median, Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, params_ged=params_ged) time_total = time.time() - time0 print('\ntime: ', time_total) time_list.append(time_total) # compute distance between \psi and the new generated graphs. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False) dhat_new_list = [] for idx, g_tmp in enumerate(ghat_new_list): # @todo: the term3 below could use the one at the beginning of the function. dhat_new_list.append( dis_gstar(idx, range(len(ghat_new_list), len(ghat_new_list) + len(Gn_median) + 1), alpha_range, knew, withterm3=False)) print('\nsmallest distance in kernel space: ', dhat_new_list[0]) dis_ks_min_list.append(dhat_new_list[0]) g_best.append(ghat_new_list[0]) # show the best graph and save it to file. # print('the shortest distance is', dhat) print('one of the possible corresponding pre-images is') nx.draw(ghat_new_list[0], labels=nx.get_node_attributes(ghat_new_list[0], 'atom'), with_labels=True) plt.show() # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) + '.png', format="PNG") plt.clf() # print(ghat_list[0].nodes(data=True)) # print(ghat_list[0].edges(data=True)) sod_gs_list.append(sod_min) # sod_gs_min_list.append(np.min(sod_min)) print('\nsmallest sod in graph space: ', sod_min) print('\nsods in graph space: ', sod_gs_list) # print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) print( '\nsmallest distance in kernel space for each set of median graphs: ', dis_ks_min_list) # print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', # nb_updated_list) # print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', # nb_updated_k_list) print('\ntimes:', time_list)
def perm_mi(args): ''' Remove edges, permute, align, then measure MI. ''' args.n_epochs = 1000 params = {'n_blocks': 4} use_given_graph = False if use_given_graph: #True:#False: #True: g = torch.load('mi_g_.pt') else: seed = 0 if args.fix_seed else None g = utils.create_graph(40, gtype='block', params=params, seed=seed) #torch.save(g, 'mi_g.pt') orig_cls = [] for i in range(4): orig_cls.extend([i for _ in range(10)]) orig_cls = np.array(orig_cls) Lg = utils.graph_to_lap(g) args.Lx = Lg.clone() args.m = len(Lg) #remove edges and permute n_remove = args.n_remove #150 rand_seed = 0 if args.fix_seed else None Lg_removed = utils.remove_edges(Lg, n_remove=n_remove, seed=rand_seed) Lg_perm, perm = utils.permute_nodes(Lg_removed.numpy(), seed=rand_seed) inv_perm = np.empty(args.m, perm.dtype) inv_perm[perm] = np.arange(args.m) ##Ly = torch.from_numpy(Lg_perm) Ly = torch.from_numpy(Lg_perm) #Lg_removed.clone() #args.Lx.clone() args.n = len(Ly) #8 st_n_samples worked best, 5 sinkhorn iter, 1 as tau #align time0 = time.time() loss, P, Ly_ = graph.graph_dist(args, plot=False, Ly=Ly, take_ly_exp=False) dur_ot = time.time() - time0 orig_idx = P.argmax(-1).cpu().numpy() perm_mx = False if perm_mx: P_max = P.max(-1, keepdim=True)[0] P[P < P_max - .1] = 0 P[P > 0] = 1 new_cls = orig_cls[perm][orig_idx].reshape(-1) mi = utils.normalizedMI(orig_cls, new_cls) #return mi Lx = args.Lx time0 = time.time() x_reg, y_reg, (P_st, loss_st) = st.find_permutation(Ly.cpu().numpy(), Lx.cpu().numpy(), args.st_it, args.st_tau, args.st_n_samples, args.st_epochs, args.st_lr, loss_type='w', alpha=0, ones=True, graphs=True) dur_st = time.time() - time0 orig_idx = P_st.argmax(-1) new_cls_st = orig_cls[perm][orig_idx].reshape(-1) mi_st = utils.normalizedMI(orig_cls, new_cls_st) #print('{} COPT {} GOT {} dur ot {} dur st {}'.format(n_remove, mi, mi_st, dur_ot, dur_st)) print('{} {} {} {} {}'.format(n_remove, mi, mi_st, dur_ot, dur_st)) return mi
def test_random_preimage_2combination(): ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {}} # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) # Gn = Gn[0:12] remove_edges(Gn) gkernel = 'marginalizedkernel' # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel) # print(dis_max, dis_min, dis_mean) lmbda = 0.03 # termination probalility r_max = 10 # iteration limit for pre-image. l = 500 alpha_range = np.linspace(0, 1, 11) k = 5 # k nearest neighbors # randomly select two molecules np.random.seed(1) idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2) g1 = Gn[idx_gi[0]].copy() g2 = Gn[idx_gi[1]].copy() # nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) # plt.savefig("results/random_preimage/mutag10.png", format="PNG") # plt.show() # nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) # plt.savefig("results/random_preimage/mutag11.png", format="PNG") # plt.show() ###################################################################### # Gn_mix = [g.copy() for g in Gn] # Gn_mix.append(g1.copy()) # Gn_mix.append(g2.copy()) # ## g_tmp = iam([g1, g2]) ## nx.draw_networkx(g_tmp) ## plt.show() # # # compute # time0 = time.time() # km = compute_kernel(Gn_mix, gkernel, True) # time_km = time.time() - time0 ################################################################### idx1 = idx_gi[0] idx2 = idx_gi[1] gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') km = gmfile['gm'] time_km = gmfile['gmtime'] # modify mixed gram matrix. for i in range(len(Gn)): km[i, len(Gn)] = km[i, idx1] km[i, len(Gn) + 1] = km[i, idx2] km[len(Gn), i] = km[i, idx1] km[len(Gn) + 1, i] = km[i, idx2] km[len(Gn), len(Gn)] = km[idx1, idx1] km[len(Gn), len(Gn) + 1] = km[idx1, idx2] km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] ################################################################### time_list = [] nb_updated_list = [] g_best = [] dis_ks_min_list = [] # for each alpha for alpha in alpha_range: print('\n-------------------------------------------------------\n') print('alpha =', alpha) time0 = time.time() dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, l, gkernel) time_total = time.time() - time0 + time_km print('time: ', time_total) time_list.append(time_total) dis_ks_min_list.append(dhat) g_best.append(ghat) nb_updated_list.append(nb_updated) # show best graphs and save them to file. for idx, item in enumerate(alpha_range): print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) print('one of the possible corresponding pre-images is') nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), with_labels=True) plt.show() plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG") plt.clf() print(g_best[idx].nodes(data=True)) print(g_best[idx].edges(data=True)) # # compute the corresponding sod in graph space. (alpha range not considered.) # sod_tmp, _ = median_distance(g_best[0], Gn_let) # sod_gs_list.append(sod_tmp) # sod_gs_min_list.append(np.min(sod_tmp)) # sod_ks_min_list.append(sod_ks) # nb_updated_list.append(nb_updated) # print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) print('\nnumber of updates for each alpha: ', nb_updated_list) print('\ntimes:', time_list)
def test_preimage_random_grid_k_median_nb(): ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {}} # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) # Gn = Gn[0:50] remove_edges(Gn) gkernel = 'marginalizedkernel' lmbda = 0.03 # termination probalility r_max = 5 # iteration limit for pre-image. l = 500 # update limit for random generation # alpha_range = np.linspace(0.5, 0.5, 1) # k = 5 # k nearest neighbors # parameters for GED function ged_cost='CHEM_1' ged_method='IPFP' saveGXL='gedlib' # number of graphs; we what to compute the median of these graphs. nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] # number of nearest neighbors. k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100] # find out all the graphs classified to positive group 1. idx_dict = get_same_item_indices(y_all) Gn = [Gn[i] for i in idx_dict[1]] # # compute Gram matrix. # time0 = time.time() # km = compute_kernel(Gn, gkernel, True) # time_km = time.time() - time0 # # write Gram matrix to file. # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) time_list = [] dis_ks_min_list = [] sod_gs_list = [] sod_gs_min_list = [] nb_updated_list = [] g_best = [] for idx_nb, nb_median in enumerate(nb_median_range): print('\n-------------------------------------------------------') print('number of median graphs =', nb_median) random.seed(1) idx_rdm = random.sample(range(len(Gn)), nb_median) print('graphs chosen:', idx_rdm) Gn_median = [Gn[idx].copy() for idx in idx_rdm] # for g in Gn_median: # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) ## plt.savefig("results/preimage_mix/mutag.png", format="PNG") # plt.show() # plt.clf() ################################################################### gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') km_tmp = gmfile['gm'] time_km = gmfile['gmtime'] # modify mixed gram matrix. km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) for i in range(len(Gn)): for j in range(i, len(Gn)): km[i, j] = km_tmp[i, j] km[j, i] = km[i, j] for i in range(len(Gn)): for j, idx in enumerate(idx_rdm): km[i, len(Gn) + j] = km[i, idx] km[len(Gn) + j, i] = km[i, idx] for i, idx1 in enumerate(idx_rdm): for j, idx2 in enumerate(idx_rdm): km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] ################################################################### alpha_range = [1 / nb_median] * nb_median time_list.append([]) dis_ks_min_list.append([]) sod_gs_list.append([]) sod_gs_min_list.append([]) nb_updated_list.append([]) g_best.append([]) for k in k_range: print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') print('k =', k) time0 = time.time() dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel) time_total = time.time() - time0 + time_km print('time: ', time_total) time_list[idx_nb].append(time_total) print('\nsmallest distance in kernel space: ', dhat) dis_ks_min_list[idx_nb].append(dhat) g_best[idx_nb].append(ghat) print('\nnumber of updates of the best graph: ', nb_updated) nb_updated_list[idx_nb].append(nb_updated) # show the best graph and save it to file. print('the shortest distance is', dhat) print('one of the possible corresponding pre-images is') nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), with_labels=True) plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + '_k' + str(k) + '.png', format="PNG") # plt.show() plt.clf() # print(ghat_list[0].nodes(data=True)) # print(ghat_list[0].edges(data=True)) # compute the corresponding sod in graph space. sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL) sod_gs_list[idx_nb].append(sod_tmp) sod_gs_min_list[idx_nb].append(np.min(sod_tmp)) print('\nsmallest sod in graph space: ', np.min(sod_tmp)) print('\nsods in graph space: ', sod_gs_list) print('\nsmallest sod in graph space for each set of median graphs and k: ', sod_gs_min_list) print('\nsmallest distance in kernel space for each set of median graphs and k: ', dis_ks_min_list) print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', nb_updated_list) print('\ntimes:', time_list)
def test_gkiam_2combination(): from gk_iam import gk_iam_nearest_multi ds = { 'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {} } # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) # Gn = Gn[0:50] remove_edges(Gn) gkernel = 'marginalizedkernel' lmbda = 0.03 # termination probalility r_max = 10 # iteration limit for pre-image. alpha_range = np.linspace(0.5, 0.5, 1) k = 20 # k nearest neighbors epsilon = 1e-6 ged_cost = 'CHEM_1' ged_method = 'IPFP' saveGXL = 'gedlib' c_ei = 1 c_er = 1 c_es = 1 # randomly select two molecules np.random.seed(1) idx_gi = [10, 11] # np.random.randint(0, len(Gn), 2) g1 = Gn[idx_gi[0]].copy() g2 = Gn[idx_gi[1]].copy() # Gn[10] = [] # Gn[10] = [] # nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) # plt.savefig("results/random_preimage/mutag10.png", format="PNG") # plt.show() # nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) # plt.savefig("results/random_preimage/mutag11.png", format="PNG") # plt.show() Gn_mix = [g.copy() for g in Gn] Gn_mix.append(g1.copy()) Gn_mix.append(g2.copy()) # compute # time0 = time.time() # km = compute_kernel(Gn_mix, gkernel, True) # time_km = time.time() - time0 # write Gram matrix to file and read it. # np.savez('results/gram_matrix.gm', gm=km, gmtime=time_km) gmfile = np.load('results/gram_matrix.gm.npz') km = gmfile['gm'] time_km = gmfile['gmtime'] time_list = [] dis_ks_min_list = [] sod_gs_list = [] sod_gs_min_list = [] nb_updated_list = [] g_best = [] # for each alpha for alpha in alpha_range: print('\n-------------------------------------------------------\n') print('alpha =', alpha) time0 = time.time() dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi( Gn, [g1, g2], [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, gkernel, c_ei=c_ei, c_er=c_er, c_es=c_es, epsilon=epsilon, ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL) time_total = time.time() - time0 + time_km print('time: ', time_total) time_list.append(time_total) dis_ks_min_list.append(dhat) g_best.append(ghat_list) nb_updated_list.append(nb_updated) # show best graphs and save them to file. for idx, item in enumerate(alpha_range): print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) print('one of the possible corresponding pre-images is') nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), with_labels=True) plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png', format="PNG") plt.show() print(g_best[idx][0].nodes(data=True)) print(g_best[idx][0].edges(data=True)) # for g in g_best[idx]: # draw_Letter_graph(g, savepath='results/gk_iam/') ## nx.draw_networkx(g) ## plt.show() # print(g.nodes(data=True)) # print(g.edges(data=True)) # compute the corresponding sod in graph space. for idx, item in enumerate(alpha_range): sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL) sod_gs_list.append(sod_tmp) sod_gs_min_list.append(np.min(sod_tmp)) print('\nsods in graph space: ', sod_gs_list) print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) print('\nnumber of updates for each alpha: ', nb_updated_list) print('\ntimes:', time_list)
def test_gkiam_2combination_all_pairs(): ds = { 'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {} } # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) # Gn = Gn[0:50] remove_edges(Gn) gkernel = 'marginalizedkernel' lmbda = 0.03 # termination probalility r_max = 10 # iteration limit for pre-image. alpha_range = np.linspace(0.5, 0.5, 1) k = 5 # k nearest neighbors epsilon = 1e-6 InitIAMWithAllDk = False # parameters for GED function ged_cost = 'CHEM_1' ged_method = 'IPFP' saveGXL = 'gedlib' # parameters for IAM function c_ei = 1 c_er = 1 c_es = 1 ite_max_iam = 50 epsilon_iam = 0.001 removeNodes = True connected_iam = False nb_update_mat = np.full((len(Gn), len(Gn)), np.inf) # test on each pair of graphs. # for idx1 in range(len(Gn) - 1, -1, -1): # for idx2 in range(idx1, -1, -1): for idx1 in range(187, 188): for idx2 in range(167, 168): g1 = Gn[idx1].copy() g2 = Gn[idx2].copy() # Gn[10] = [] # Gn[10] = [] nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) plt.savefig("results/gk_iam/all_pairs/mutag187.png", format="PNG") plt.show() plt.clf() nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) plt.savefig("results/gk_iam/all_pairs/mutag167.png", format="PNG") plt.show() plt.clf() ################################################################### # Gn_mix = [g.copy() for g in Gn] # Gn_mix.append(g1.copy()) # Gn_mix.append(g2.copy()) # # # compute # time0 = time.time() # km = compute_kernel(Gn_mix, gkernel, True) # time_km = time.time() - time0 # # # write Gram matrix to file and read it. # np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km) ################################################################### gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') km = gmfile['gm'] time_km = gmfile['gmtime'] # modify mixed gram matrix. for i in range(len(Gn)): km[i, len(Gn)] = km[i, idx1] km[i, len(Gn) + 1] = km[i, idx2] km[len(Gn), i] = km[i, idx1] km[len(Gn) + 1, i] = km[i, idx2] km[len(Gn), len(Gn)] = km[idx1, idx1] km[len(Gn), len(Gn) + 1] = km[idx1, idx2] km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] ################################################################### # # use only the two graphs in median set as candidates. # Gn = [g1.copy(), g2.copy()] # Gn_mix = Gn + [g1.copy(), g2.copy()] # # compute # time0 = time.time() # km = compute_kernel(Gn_mix, gkernel, True) # time_km = time.time() - time0 time_list = [] dis_ks_min_list = [] sod_gs_list = [] sod_gs_min_list = [] nb_updated_list = [] nb_updated_k_list = [] g_best = [] # for each alpha for alpha in alpha_range: print( '\n-------------------------------------------------------\n' ) print('alpha =', alpha) time0 = time.time() dhat, ghat_list, sod_ks, nb_updated, nb_updated_k = \ preimage_iam(Gn, [g1, g2], [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, 'removeNodes': removeNodes, 'connected': connected_iam}, params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 'saveGXL': saveGXL}) time_total = time.time() - time0 + time_km print('time: ', time_total) time_list.append(time_total) dis_ks_min_list.append(dhat) g_best.append(ghat_list) nb_updated_list.append(nb_updated) nb_updated_k_list.append(nb_updated_k) # show best graphs and save them to file. for idx, item in enumerate(alpha_range): print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) print('one of the possible corresponding pre-images is') nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), with_labels=True) plt.savefig('results/gk_iam/mutag' + str(idx1) + '_' + str(idx2) + '_alpha' + str(item) + '.png', format="PNG") # plt.show() plt.clf() # print(g_best[idx][0].nodes(data=True)) # print(g_best[idx][0].edges(data=True)) # for g in g_best[idx]: # draw_Letter_graph(g, savepath='results/gk_iam/') ## nx.draw_networkx(g) ## plt.show() # print(g.nodes(data=True)) # print(g.edges(data=True)) # compute the corresponding sod in graph space. for idx, item in enumerate(alpha_range): sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL) sod_gs_list.append(sod_tmp) sod_gs_min_list.append(np.min(sod_tmp)) print('\nsods in graph space: ', sod_gs_list) print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) print('\nnumber of updates of the best graph for each alpha: ', nb_updated_list) print( '\nnumber of updates of the k nearest graphs for each alpha: ', nb_updated_k_list) print('\ntimes:', time_list) nb_update_mat[idx1, idx2] = nb_updated_list[0] str_fw = 'graphs %d and %d: %d.\n' % (idx1, idx2, nb_updated_list[0]) with open('results/gk_iam/all_pairs/nb_updates.txt', 'r+') as file: content = file.read() file.seek(0, 0) file.write(str_fw + content)
def test_preimage_iam_median_nb(): ds = { 'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {} } # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) # Gn = Gn[0:50] remove_edges(Gn) gkernel = 'marginalizedkernel' lmbda = 0.03 # termination probalility r_max = 3 # iteration limit for pre-image. # alpha_range = np.linspace(0.5, 0.5, 1) k = 5 # k nearest neighbors epsilon = 1e-6 InitIAMWithAllDk = True # parameters for IAM function # c_vi = 0.037 # c_vr = 0.038 # c_vs = 0.075 # c_ei = 0.001 # c_er = 0.001 # c_es = 0.0 c_vi = 4 c_vr = 4 c_vs = 2 c_ei = 1 c_er = 1 c_es = 1 ite_max_iam = 50 epsilon_iam = 0.001 removeNodes = True connected_iam = False # parameters for GED function # ged_cost='CHEM_1' ged_cost = 'CONSTANT' ged_method = 'IPFP' edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] ged_stabilizer = 'min' ged_repeat = 50 params_ged = { 'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 'edit_cost_constant': edit_cost_constant, 'stabilizer': ged_stabilizer, 'repeat': ged_repeat } # number of graphs; we what to compute the median of these graphs. # nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] nb_median_range = [2] # find out all the graphs classified to positive group 1. idx_dict = get_same_item_indices(y_all) Gn = [Gn[i] for i in idx_dict[1]] # # compute Gram matrix. # time0 = time.time() # km = compute_kernel(Gn, gkernel, True) # time_km = time.time() - time0 # # write Gram matrix to file. # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) time_list = [] dis_ks_min_list = [] sod_gs_list = [] sod_gs_min_list = [] nb_updated_list = [] nb_updated_k_list = [] g_best = [] for nb_median in nb_median_range: print('\n-------------------------------------------------------') print('number of median graphs =', nb_median) random.seed(1) idx_rdm = random.sample(range(len(Gn)), nb_median) print('graphs chosen:', idx_rdm) Gn_median = [Gn[idx].copy() for idx in idx_rdm] # for g in Gn_median: # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) ## plt.savefig("results/preimage_mix/mutag.png", format="PNG") # plt.show() # plt.clf() ################################################################### gmfile = np.load( 'results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') km_tmp = gmfile['gm'] time_km = gmfile['gmtime'] # modify mixed gram matrix. km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) for i in range(len(Gn)): for j in range(i, len(Gn)): km[i, j] = km_tmp[i, j] km[j, i] = km[i, j] for i in range(len(Gn)): for j, idx in enumerate(idx_rdm): km[i, len(Gn) + j] = km[i, idx] km[len(Gn) + j, i] = km[i, idx] for i, idx1 in enumerate(idx_rdm): for j, idx2 in enumerate(idx_rdm): km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] ################################################################### alpha_range = [1 / nb_median] * nb_median time0 = time.time() dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \ preimage_iam(Gn, Gn_median, alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, 'removeNodes': removeNodes, 'connected': connected_iam}, params_ged=params_ged) time_total = time.time() - time0 + time_km print('\ntime: ', time_total) time_list.append(time_total) print('\nsmallest distance in kernel space: ', dhat) dis_ks_min_list.append(dhat) g_best.append(ghat_list) print('\nnumber of updates of the best graph: ', nb_updated) nb_updated_list.append(nb_updated) print('\nnumber of updates of k nearest graphs: ', nb_updated_k) nb_updated_k_list.append(nb_updated_k) # show the best graph and save it to file. print('the shortest distance is', dhat) print('one of the possible corresponding pre-images is') nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), with_labels=True) plt.show() # plt.savefig('results/preimage_iam/mutag_median_cs.001_nb' + str(nb_median) + # '.png', format="PNG") plt.clf() # print(ghat_list[0].nodes(data=True)) # print(ghat_list[0].edges(data=True)) # compute the corresponding sod in graph space. sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, params_ged=params_ged) sod_gs_list.append(sod_tmp) sod_gs_min_list.append(np.min(sod_tmp)) print('\nsmallest sod in graph space: ', np.min(sod_tmp)) print('\nsods in graph space: ', sod_gs_list) print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) print( '\nsmallest distance in kernel space for each set of median graphs: ', dis_ks_min_list) print( '\nnumber of updates of the best graph for each set of median graphs by IAM: ', nb_updated_list) print( '\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', nb_updated_k_list) print('\ntimes:', time_list)