Beispiel #1
0
def run_NAIE(config):
    graph = load_graph(config['dataset'], labels_is_onehot=False)

    if config['task'] == 'lp':
        graph.G = remove_edges(
            graph.G,
            config['lp_test_path'] + config['dataset'] + "_lp_test.edgelist")
        print("Left edges in G: {}".format(graph.G.number_of_edges()))
        test_pairs, test_labels = read_test_links(config['lp_test_path'] +
                                                  config['dataset'] +
                                                  "_lp_test.edgelist")
        config['link_test_pairs'] = [
            (edges[0], edges[1], label)
            for edges, label in zip(test_pairs, test_labels)
        ]

    y = graph.labels
    X = graph.features
    A = graph.adjcency_matrix(is_sparse=False)
    C = np.concatenate([A, config['lambda'] * X], axis=1)

    smooth_X = smooth(A, X, 1.0)
    smooth_A = smooth(A, A, 1.0)
    if config['strategy'] == 'nc':
        gamma_adj = 1 - get_balance_coefficient(graph.G, smooth_A)
        gamma_attr = 1 - get_balance_coefficient(graph.G, smooth_X)
    elif config['strategy'] == 'sw':
        omega = get_omega(graph.G)
        omega = abs(omega)
        if omega > 1:
            omega = 1.0
        gamma_adj = omega
        gamma_attr = omega
    print("gamma_adj={:4f}, gamma_attr={:.4f}".format(gamma_adj, gamma_attr))
    ada_smooth_A = smooth(A, A, gamma_adj)
    ada_smooth_X = smooth(A, X, gamma_attr)
    target = np.concatenate([ada_smooth_A, config['lambda'] * ada_smooth_X],
                            axis=1)

    config['struct'][0] = C.shape[1]
    data = {'C': C, 'target': target, 'adj': A, 'y': y}
    model = NAIE(config)
    model.train(data)
Beispiel #2
0
def test_anycosts():
    ds = {
        'name': 'MUTAG',
        'dataset': '../datasets/MUTAG/MUTAG_A.txt',
        'extra_params': {}
    }  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    #    Gn = Gn[0:10]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    itr_max = 10
    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
    total_time = np.sum(time_list)
    print('\nedit_costs:', edit_costs)
    print('\nresidual_list:', residual_list)
    print('\nedit_cost_list:', edit_cost_list)
    print('\ndistance matrix in kernel space:', dis_k_mat)
    print('\nged matrix:', ged_mat)
    print('\ntotal time:', total_time)
    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
    np.savez('results/fit_distance.any_costs.gm',
             edit_costs=edit_costs,
             residual_list=residual_list,
             edit_cost_list=edit_cost_list,
             dis_k_mat=dis_k_mat,
             ged_mat=ged_mat,
             time_list=time_list,
             total_time=total_time,
             nb_cost_mat_list=nb_cost_mat_list)

    #    # normalized distance matrices.
    #    gmfile = np.load('results/fit_distance.any_costs.gm.npz')
    #    edit_costs = gmfile['edit_costs']
    #    residual_list = gmfile['residual_list']
    #    edit_cost_list = gmfile['edit_cost_list']
    #    dis_k_mat = gmfile['dis_k_mat']
    #    ged_mat = gmfile['ged_mat']
    #    total_time = gmfile['total_time']
    ##    nb_cost_mat_list = gmfile['nb_cost_mat_list']

    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
    plt.imshow(norm_dis_k_mat)
    plt.colorbar()
    plt.savefig('results/norm_dis_k_mat.any_costs' + '.eps',
                format='eps',
                dpi=300)
    #    plt.savefig('results/norm_dis_k_mat.any_costs' + '.png', format='png')
    #    plt.show()
    plt.clf()

    norm_ged_mat = normalize_distance_matrix(ged_mat)
    plt.imshow(norm_ged_mat)
    plt.colorbar()
    plt.savefig('results/norm_ged_mat.any_costs' + '.eps',
                format='eps',
                dpi=300)
    #    plt.savefig('results/norm_ged_mat.any_costs' + '.png', format='png')
    #    plt.show()
    plt.clf()

    norm_diff = norm_ged_mat - norm_dis_k_mat
    plt.imshow(norm_diff)
    plt.colorbar()
    plt.savefig('results/diff_mat_norm_ged_dis_k.any_costs' + '.eps',
                format='eps',
                dpi=300)
    #    plt.savefig('results/diff_mat_norm_ged_dis_k.any_costs' + '.png', format='png')
    #    plt.show()
    plt.clf()
Beispiel #3
0
def test_iam_median_nb():

    ds = {
        'name': 'MUTAG',
        'dataset': '../datasets/MUTAG/MUTAG_A.txt',
        'extra_params': {}
    }  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'

    lmbda = 0.03  # termination probalility
    #    # parameters for GED function
    #    c_vi = 0.037
    #    c_vr = 0.038
    #    c_vs = 0.075
    #    c_ei = 0.001
    #    c_er = 0.001
    #    c_es = 0.0
    #    ite_max_iam = 50
    #    epsilon_iam = 0.001
    #    removeNodes = False
    #    connected_iam = False
    #    # parameters for IAM function
    #    ged_cost = 'CONSTANT'
    #    ged_method = 'IPFP'
    #    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
    #    ged_stabilizer = 'min'
    #    ged_repeat = 50
    #    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
    #                  'edit_cost_constant': edit_cost_constant,
    #                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}

    # parameters for GED function
    c_vi = 4
    c_vr = 4
    c_vs = 2
    c_ei = 1
    c_er = 1
    c_es = 1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = False
    connected_iam = False
    # parameters for IAM function
    ged_cost = 'CHEM_1'
    ged_method = 'IPFP'
    edit_cost_constant = []
    ged_stabilizer = 'min'
    ged_repeat = 50
    params_ged = {
        'lib': 'gedlibpy',
        'cost': ged_cost,
        'method': ged_method,
        'edit_cost_constant': edit_cost_constant,
        'stabilizer': ged_stabilizer,
        'repeat': ged_repeat
    }

    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]

    # number of graphs; we what to compute the median of these graphs.
    #    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    nb_median_range = [len(Gn)]

    #    # compute Gram matrix.
    #    time0 = time.time()
    #    km = compute_kernel(Gn, gkernel, True)
    #    time_km = time.time() - time0
    #    # write Gram matrix to file.
    #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)

    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    #    sod_gs_min_list = []
    #    nb_updated_list = []
    #    nb_updated_k_list = []
    g_best = []
    for nb_median in nb_median_range:
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
        Gn_candidate = [g.copy() for g in Gn]

        #        for g in Gn_median:
        #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
        ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
        #            plt.show()
        #            plt.clf()

        ###################################################################
        #        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        #        km_tmp = gmfile['gm']
        #        time_km = gmfile['gmtime']
        #        # modify mixed gram matrix.
        #        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        #        for i in range(len(Gn)):
        #            for j in range(i, len(Gn)):
        #                km[i, j] = km_tmp[i, j]
        #                km[j, i] = km[i, j]
        #        for i in range(len(Gn)):
        #            for j, idx in enumerate(idx_rdm):
        #                km[i, len(Gn) + j] = km[i, idx]
        #                km[len(Gn) + j, i] = km[i, idx]
        #        for i, idx1 in enumerate(idx_rdm):
        #            for j, idx2 in enumerate(idx_rdm):
        #                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]

        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time0 = time.time()
        ghat_new_list, sod_min = iam_upgraded(Gn_median,
                                              Gn_candidate,
                                              c_ei=c_ei,
                                              c_er=c_er,
                                              c_es=c_es,
                                              ite_max=ite_max_iam,
                                              epsilon=epsilon_iam,
                                              connected=connected_iam,
                                              removeNodes=removeNodes,
                                              params_ged=params_ged)

        time_total = time.time() - time0
        print('\ntime: ', time_total)
        time_list.append(time_total)

        # compute distance between \psi and the new generated graphs.
        knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
        dhat_new_list = []
        for idx, g_tmp in enumerate(ghat_new_list):
            # @todo: the term3 below could use the one at the beginning of the function.
            dhat_new_list.append(
                dis_gstar(idx,
                          range(len(ghat_new_list),
                                len(ghat_new_list) + len(Gn_median) + 1),
                          alpha_range,
                          knew,
                          withterm3=False))

        print('\nsmallest distance in kernel space: ', dhat_new_list[0])
        dis_ks_min_list.append(dhat_new_list[0])
        g_best.append(ghat_new_list[0])

        # show the best graph and save it to file.
        #        print('the shortest distance is', dhat)
        print('one of the possible corresponding pre-images is')
        nx.draw(ghat_new_list[0],
                labels=nx.get_node_attributes(ghat_new_list[0], 'atom'),
                with_labels=True)
        plt.show()
        #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
        plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) +
                    '.png',
                    format="PNG")
        plt.clf()
        #        print(ghat_list[0].nodes(data=True))
        #        print(ghat_list[0].edges(data=True))

        sod_gs_list.append(sod_min)
        #        sod_gs_min_list.append(np.min(sod_min))
        print('\nsmallest sod in graph space: ', sod_min)

    print('\nsods in graph space: ', sod_gs_list)
    #    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
    print(
        '\nsmallest distance in kernel space for each set of median graphs: ',
        dis_ks_min_list)
    #    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ',
    #          nb_updated_list)
    #    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ',
    #          nb_updated_k_list)
    print('\ntimes:', time_list)
Beispiel #4
0
def perm_mi(args):
    '''
    Remove edges, permute, align, then measure MI.
    '''
    args.n_epochs = 1000
    params = {'n_blocks': 4}
    use_given_graph = False
    if use_given_graph:  #True:#False: #True:
        g = torch.load('mi_g_.pt')
    else:
        seed = 0 if args.fix_seed else None
        g = utils.create_graph(40, gtype='block', params=params, seed=seed)
        #torch.save(g, 'mi_g.pt')
    orig_cls = []
    for i in range(4):
        orig_cls.extend([i for _ in range(10)])
    orig_cls = np.array(orig_cls)
    Lg = utils.graph_to_lap(g)
    args.Lx = Lg.clone()
    args.m = len(Lg)

    #remove edges and permute
    n_remove = args.n_remove  #150
    rand_seed = 0 if args.fix_seed else None
    Lg_removed = utils.remove_edges(Lg, n_remove=n_remove, seed=rand_seed)
    Lg_perm, perm = utils.permute_nodes(Lg_removed.numpy(), seed=rand_seed)

    inv_perm = np.empty(args.m, perm.dtype)
    inv_perm[perm] = np.arange(args.m)

    ##Ly = torch.from_numpy(Lg_perm)
    Ly = torch.from_numpy(Lg_perm)  #Lg_removed.clone() #args.Lx.clone()
    args.n = len(Ly)
    #8 st_n_samples worked best, 5 sinkhorn iter, 1 as tau
    #align
    time0 = time.time()
    loss, P, Ly_ = graph.graph_dist(args, plot=False, Ly=Ly, take_ly_exp=False)
    dur_ot = time.time() - time0

    orig_idx = P.argmax(-1).cpu().numpy()
    perm_mx = False
    if perm_mx:
        P_max = P.max(-1, keepdim=True)[0]
        P[P < P_max - .1] = 0
        P[P > 0] = 1

    new_cls = orig_cls[perm][orig_idx].reshape(-1)
    mi = utils.normalizedMI(orig_cls, new_cls)
    #return mi
    Lx = args.Lx
    time0 = time.time()
    x_reg, y_reg, (P_st, loss_st) = st.find_permutation(Ly.cpu().numpy(),
                                                        Lx.cpu().numpy(),
                                                        args.st_it,
                                                        args.st_tau,
                                                        args.st_n_samples,
                                                        args.st_epochs,
                                                        args.st_lr,
                                                        loss_type='w',
                                                        alpha=0,
                                                        ones=True,
                                                        graphs=True)
    dur_st = time.time() - time0
    orig_idx = P_st.argmax(-1)

    new_cls_st = orig_cls[perm][orig_idx].reshape(-1)
    mi_st = utils.normalizedMI(orig_cls, new_cls_st)
    #print('{} COPT {} GOT {} dur ot {} dur st {}'.format(n_remove, mi, mi_st, dur_ot, dur_st))
    print('{} {} {} {} {}'.format(n_remove, mi, mi_st, dur_ot, dur_st))
    return mi
def test_random_preimage_2combination():
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
#    Gn = Gn[0:12]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    
#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel)
#    print(dis_max, dis_min, dis_mean)
    
    lmbda = 0.03 # termination probalility
    r_max = 10 # iteration limit for pre-image.
    l = 500
    alpha_range = np.linspace(0, 1, 11)
    k = 5 # k nearest neighbors
    
    # randomly select two molecules
    np.random.seed(1)
    idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2)
    g1 = Gn[idx_gi[0]].copy()
    g2 = Gn[idx_gi[1]].copy()
    
#    nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
#    plt.savefig("results/random_preimage/mutag10.png", format="PNG")
#    plt.show()
#    nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
#    plt.savefig("results/random_preimage/mutag11.png", format="PNG")
#    plt.show()    
    
    ######################################################################
#    Gn_mix = [g.copy() for g in Gn]
#    Gn_mix.append(g1.copy())
#    Gn_mix.append(g2.copy())
#    
##    g_tmp = iam([g1, g2])
##    nx.draw_networkx(g_tmp)
##    plt.show()
#    
#    # compute 
#    time0 = time.time()
#    km = compute_kernel(Gn_mix, gkernel, True)
#    time_km = time.time() - time0
    
    ###################################################################
    idx1 = idx_gi[0]
    idx2 = idx_gi[1]
    gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
    km = gmfile['gm']
    time_km = gmfile['gmtime']
    # modify mixed gram matrix.
    for i in range(len(Gn)):
        km[i, len(Gn)] = km[i, idx1]
        km[i, len(Gn) + 1] = km[i, idx2]
        km[len(Gn), i] = km[i, idx1]
        km[len(Gn) + 1, i] = km[i, idx2]
    km[len(Gn), len(Gn)] = km[idx1, idx1]
    km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
    km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
    km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
            
    ###################################################################

    time_list = []
    nb_updated_list = []
    g_best = []
    dis_ks_min_list = []
    # for each alpha
    for alpha in alpha_range:
        print('\n-------------------------------------------------------\n')
        print('alpha =', alpha)
        time0 = time.time()
        dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], 
                                          range(len(Gn), len(Gn) + 2), km,
                                          k, r_max, l, gkernel)
        time_total = time.time() - time0 + time_km
        print('time: ', time_total)
        time_list.append(time_total)
        dis_ks_min_list.append(dhat)
        g_best.append(ghat)
        nb_updated_list.append(nb_updated)
        
    # show best graphs and save them to file.
    for idx, item in enumerate(alpha_range):
        print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
        print('one of the possible corresponding pre-images is')
        nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), 
                with_labels=True)
        plt.show()
        plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG")
        plt.clf()
        print(g_best[idx].nodes(data=True))
        print(g_best[idx].edges(data=True))
            
#        # compute the corresponding sod in graph space. (alpha range not considered.)
#        sod_tmp, _ = median_distance(g_best[0], Gn_let)
#        sod_gs_list.append(sod_tmp)
#        sod_gs_min_list.append(np.min(sod_tmp))
#        sod_ks_min_list.append(sod_ks)
#        nb_updated_list.append(nb_updated)
                      
#    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
    print('\nnumber of updates for each alpha: ', nb_updated_list)             
    print('\ntimes:', time_list)
def test_preimage_random_grid_k_median_nb():    
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
#    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    
    lmbda = 0.03 # termination probalility
    r_max = 5 # iteration limit for pre-image.
    l = 500 # update limit for random generation
#    alpha_range = np.linspace(0.5, 0.5, 1)
#    k = 5 # k nearest neighbors
    # parameters for GED function
    ged_cost='CHEM_1'
    ged_method='IPFP'
    saveGXL='gedlib'
    
    # number of graphs; we what to compute the median of these graphs. 
    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    # number of nearest neighbors.
    k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
    
    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]
    
#    # compute Gram matrix.
#    time0 = time.time()
#    km = compute_kernel(Gn, gkernel, True)
#    time_km = time.time() - time0    
#    # write Gram matrix to file.
#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
        
    
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []
    g_best = []
    for idx_nb, nb_median in enumerate(nb_median_range):
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
        
#        for g in Gn_median:
#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
#            plt.show()
#            plt.clf()                         
                    
        ###################################################################
        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        km_tmp = gmfile['gm']
        time_km = gmfile['gmtime']
        # modify mixed gram matrix.
        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        for i in range(len(Gn)):
            for j in range(i, len(Gn)):
                km[i, j] = km_tmp[i, j]
                km[j, i] = km[i, j]
        for i in range(len(Gn)):
            for j, idx in enumerate(idx_rdm):
                km[i, len(Gn) + j] = km[i, idx]
                km[len(Gn) + j, i] = km[i, idx]
        for i, idx1 in enumerate(idx_rdm):
            for j, idx2 in enumerate(idx_rdm):
                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
                
        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        
        time_list.append([])
        dis_ks_min_list.append([])
        sod_gs_list.append([])
        sod_gs_min_list.append([])
        nb_updated_list.append([])
        g_best.append([])   
        
        for k in k_range:
            print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
            print('k =', k)
            time0 = time.time()
            dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, 
                range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
                
            time_total = time.time() - time0 + time_km
            print('time: ', time_total)
            time_list[idx_nb].append(time_total)
            print('\nsmallest distance in kernel space: ', dhat) 
            dis_ks_min_list[idx_nb].append(dhat)
            g_best[idx_nb].append(ghat)
            print('\nnumber of updates of the best graph: ', nb_updated)
            nb_updated_list[idx_nb].append(nb_updated)
            
            # show the best graph and save it to file.
            print('the shortest distance is', dhat)
            print('one of the possible corresponding pre-images is')
            nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), 
                    with_labels=True)
            plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + 
                        '_k' + str(k) + '.png', format="PNG")
    #        plt.show()
            plt.clf()
    #        print(ghat_list[0].nodes(data=True))
    #        print(ghat_list[0].edges(data=True))
        
            # compute the corresponding sod in graph space.
            sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, 
                                         ged_method=ged_method, saveGXL=saveGXL)
            sod_gs_list[idx_nb].append(sod_tmp)
            sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
            print('\nsmallest sod in graph space: ', np.min(sod_tmp))
        
    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each set of median graphs and k: ', 
          sod_gs_min_list)  
    print('\nsmallest distance in kernel space for each set of median graphs and k: ', 
          dis_ks_min_list) 
    print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', 
          nb_updated_list)
    print('\ntimes:', time_list)
def test_gkiam_2combination():
    from gk_iam import gk_iam_nearest_multi
    ds = {
        'name': 'MUTAG',
        'dataset': '../datasets/MUTAG/MUTAG_A.txt',
        'extra_params': {}
    }  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'

    lmbda = 0.03  # termination probalility
    r_max = 10  # iteration limit for pre-image.
    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 20  # k nearest neighbors
    epsilon = 1e-6
    ged_cost = 'CHEM_1'
    ged_method = 'IPFP'
    saveGXL = 'gedlib'
    c_ei = 1
    c_er = 1
    c_es = 1

    # randomly select two molecules
    np.random.seed(1)
    idx_gi = [10, 11]  # np.random.randint(0, len(Gn), 2)
    g1 = Gn[idx_gi[0]].copy()
    g2 = Gn[idx_gi[1]].copy()
    #    Gn[10] = []
    #    Gn[10] = []

    #    nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
    #    plt.savefig("results/random_preimage/mutag10.png", format="PNG")
    #    plt.show()
    #    nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
    #    plt.savefig("results/random_preimage/mutag11.png", format="PNG")
    #    plt.show()

    Gn_mix = [g.copy() for g in Gn]
    Gn_mix.append(g1.copy())
    Gn_mix.append(g2.copy())

    # compute
    #    time0 = time.time()
    #    km = compute_kernel(Gn_mix, gkernel, True)
    #    time_km = time.time() - time0

    # write Gram matrix to file and read it.
    #    np.savez('results/gram_matrix.gm', gm=km, gmtime=time_km)
    gmfile = np.load('results/gram_matrix.gm.npz')
    km = gmfile['gm']
    time_km = gmfile['gmtime']

    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []
    g_best = []
    # for each alpha
    for alpha in alpha_range:
        print('\n-------------------------------------------------------\n')
        print('alpha =', alpha)
        time0 = time.time()
        dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(
            Gn, [g1, g2], [alpha, 1 - alpha],
            range(len(Gn),
                  len(Gn) + 2),
            km,
            k,
            r_max,
            gkernel,
            c_ei=c_ei,
            c_er=c_er,
            c_es=c_es,
            epsilon=epsilon,
            ged_cost=ged_cost,
            ged_method=ged_method,
            saveGXL=saveGXL)
        time_total = time.time() - time0 + time_km
        print('time: ', time_total)
        time_list.append(time_total)
        dis_ks_min_list.append(dhat)
        g_best.append(ghat_list)
        nb_updated_list.append(nb_updated)

    # show best graphs and save them to file.
    for idx, item in enumerate(alpha_range):
        print('when alpha is', item, 'the shortest distance is',
              dis_ks_min_list[idx])
        print('one of the possible corresponding pre-images is')
        nx.draw(g_best[idx][0],
                labels=nx.get_node_attributes(g_best[idx][0], 'atom'),
                with_labels=True)
        plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png',
                    format="PNG")
        plt.show()
        print(g_best[idx][0].nodes(data=True))
        print(g_best[idx][0].edges(data=True))

#        for g in g_best[idx]:
#            draw_Letter_graph(g, savepath='results/gk_iam/')
##            nx.draw_networkx(g)
##            plt.show()
#            print(g.nodes(data=True))
#            print(g.edges(data=True))

# compute the corresponding sod in graph space.
    for idx, item in enumerate(alpha_range):
        sod_tmp, _ = ged_median([g_best[0]], [g1, g2],
                                ged_cost=ged_cost,
                                ged_method=ged_method,
                                saveGXL=saveGXL)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))

    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)
    print('\nsmallest distance in kernel space for each alpha: ',
          dis_ks_min_list)
    print('\nnumber of updates for each alpha: ', nb_updated_list)
    print('\ntimes:', time_list)
def test_gkiam_2combination_all_pairs():
    ds = {
        'name': 'MUTAG',
        'dataset': '../datasets/MUTAG/MUTAG_A.txt',
        'extra_params': {}
    }  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'

    lmbda = 0.03  # termination probalility
    r_max = 10  # iteration limit for pre-image.
    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 5  # k nearest neighbors
    epsilon = 1e-6
    InitIAMWithAllDk = False
    # parameters for GED function
    ged_cost = 'CHEM_1'
    ged_method = 'IPFP'
    saveGXL = 'gedlib'
    # parameters for IAM function
    c_ei = 1
    c_er = 1
    c_es = 1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = True
    connected_iam = False

    nb_update_mat = np.full((len(Gn), len(Gn)), np.inf)
    # test on each pair of graphs.
    #    for idx1 in range(len(Gn) - 1, -1, -1):
    #        for idx2 in range(idx1, -1, -1):
    for idx1 in range(187, 188):
        for idx2 in range(167, 168):
            g1 = Gn[idx1].copy()
            g2 = Gn[idx2].copy()
            #    Gn[10] = []
            #    Gn[10] = []

            nx.draw(g1,
                    labels=nx.get_node_attributes(g1, 'atom'),
                    with_labels=True)
            plt.savefig("results/gk_iam/all_pairs/mutag187.png", format="PNG")
            plt.show()
            plt.clf()
            nx.draw(g2,
                    labels=nx.get_node_attributes(g2, 'atom'),
                    with_labels=True)
            plt.savefig("results/gk_iam/all_pairs/mutag167.png", format="PNG")
            plt.show()
            plt.clf()

            ###################################################################
            #            Gn_mix = [g.copy() for g in Gn]
            #            Gn_mix.append(g1.copy())
            #            Gn_mix.append(g2.copy())
            #
            #            # compute
            #            time0 = time.time()
            #            km = compute_kernel(Gn_mix, gkernel, True)
            #            time_km = time.time() - time0
            #
            #            # write Gram matrix to file and read it.
            #            np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)

            ###################################################################
            gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
            km = gmfile['gm']
            time_km = gmfile['gmtime']
            # modify mixed gram matrix.
            for i in range(len(Gn)):
                km[i, len(Gn)] = km[i, idx1]
                km[i, len(Gn) + 1] = km[i, idx2]
                km[len(Gn), i] = km[i, idx1]
                km[len(Gn) + 1, i] = km[i, idx2]
            km[len(Gn), len(Gn)] = km[idx1, idx1]
            km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
            km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
            km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]

            ###################################################################
            #            # use only the two graphs in median set as candidates.
            #            Gn = [g1.copy(), g2.copy()]
            #            Gn_mix = Gn + [g1.copy(), g2.copy()]
            #            # compute
            #            time0 = time.time()
            #            km = compute_kernel(Gn_mix, gkernel, True)
            #            time_km = time.time() - time0

            time_list = []
            dis_ks_min_list = []
            sod_gs_list = []
            sod_gs_min_list = []
            nb_updated_list = []
            nb_updated_k_list = []
            g_best = []
            # for each alpha
            for alpha in alpha_range:
                print(
                    '\n-------------------------------------------------------\n'
                )
                print('alpha =', alpha)
                time0 = time.time()
                dhat, ghat_list, sod_ks, nb_updated, nb_updated_k = \
                    preimage_iam(Gn, [g1, g2],
                    [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max,
                    gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
                    params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es,
                                'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                                'removeNodes': removeNodes, 'connected': connected_iam},
                    params_ged={'ged_cost': ged_cost, 'ged_method': ged_method,
                                'saveGXL': saveGXL})
                time_total = time.time() - time0 + time_km
                print('time: ', time_total)
                time_list.append(time_total)
                dis_ks_min_list.append(dhat)
                g_best.append(ghat_list)
                nb_updated_list.append(nb_updated)
                nb_updated_k_list.append(nb_updated_k)

            # show best graphs and save them to file.
            for idx, item in enumerate(alpha_range):
                print('when alpha is', item, 'the shortest distance is',
                      dis_ks_min_list[idx])
                print('one of the possible corresponding pre-images is')
                nx.draw(g_best[idx][0],
                        labels=nx.get_node_attributes(g_best[idx][0], 'atom'),
                        with_labels=True)
                plt.savefig('results/gk_iam/mutag' + str(idx1) + '_' +
                            str(idx2) + '_alpha' + str(item) + '.png',
                            format="PNG")
                #                plt.show()
                plt.clf()


#                print(g_best[idx][0].nodes(data=True))
#                print(g_best[idx][0].edges(data=True))

#        for g in g_best[idx]:
#            draw_Letter_graph(g, savepath='results/gk_iam/')
##            nx.draw_networkx(g)
##            plt.show()
#            print(g.nodes(data=True))
#            print(g.edges(data=True))

# compute the corresponding sod in graph space.
            for idx, item in enumerate(alpha_range):
                sod_tmp, _ = ged_median([g_best[0]], [g1, g2],
                                        ged_cost=ged_cost,
                                        ged_method=ged_method,
                                        saveGXL=saveGXL)
                sod_gs_list.append(sod_tmp)
                sod_gs_min_list.append(np.min(sod_tmp))

            print('\nsods in graph space: ', sod_gs_list)
            print('\nsmallest sod in graph space for each alpha: ',
                  sod_gs_min_list)
            print('\nsmallest distance in kernel space for each alpha: ',
                  dis_ks_min_list)
            print('\nnumber of updates of the best graph for each alpha: ',
                  nb_updated_list)
            print(
                '\nnumber of updates of the k nearest graphs for each alpha: ',
                nb_updated_k_list)
            print('\ntimes:', time_list)
            nb_update_mat[idx1, idx2] = nb_updated_list[0]

            str_fw = 'graphs %d and %d: %d.\n' % (idx1, idx2,
                                                  nb_updated_list[0])
            with open('results/gk_iam/all_pairs/nb_updates.txt', 'r+') as file:
                content = file.read()
                file.seek(0, 0)
                file.write(str_fw + content)
def test_preimage_iam_median_nb():
    ds = {
        'name': 'MUTAG',
        'dataset': '../datasets/MUTAG/MUTAG_A.txt',
        'extra_params': {}
    }  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    #    Gn = Gn[0:50]
    remove_edges(Gn)
    gkernel = 'marginalizedkernel'

    lmbda = 0.03  # termination probalility
    r_max = 3  # iteration limit for pre-image.
    #    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 5  # k nearest neighbors
    epsilon = 1e-6
    InitIAMWithAllDk = True
    # parameters for IAM function
    #    c_vi = 0.037
    #    c_vr = 0.038
    #    c_vs = 0.075
    #    c_ei = 0.001
    #    c_er = 0.001
    #    c_es = 0.0
    c_vi = 4
    c_vr = 4
    c_vs = 2
    c_ei = 1
    c_er = 1
    c_es = 1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = True
    connected_iam = False
    # parameters for GED function
    #    ged_cost='CHEM_1'
    ged_cost = 'CONSTANT'
    ged_method = 'IPFP'
    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
    ged_stabilizer = 'min'
    ged_repeat = 50
    params_ged = {
        'lib': 'gedlibpy',
        'cost': ged_cost,
        'method': ged_method,
        'edit_cost_constant': edit_cost_constant,
        'stabilizer': ged_stabilizer,
        'repeat': ged_repeat
    }

    # number of graphs; we what to compute the median of these graphs.
    #    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    nb_median_range = [2]

    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]

    #    # compute Gram matrix.
    #    time0 = time.time()
    #    km = compute_kernel(Gn, gkernel, True)
    #    time_km = time.time() - time0
    #    # write Gram matrix to file.
    #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)

    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    sod_gs_min_list = []
    nb_updated_list = []
    nb_updated_k_list = []
    g_best = []
    for nb_median in nb_median_range:
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]

        #        for g in Gn_median:
        #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
        ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
        #            plt.show()
        #            plt.clf()

        ###################################################################
        gmfile = np.load(
            'results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        km_tmp = gmfile['gm']
        time_km = gmfile['gmtime']
        # modify mixed gram matrix.
        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        for i in range(len(Gn)):
            for j in range(i, len(Gn)):
                km[i, j] = km_tmp[i, j]
                km[j, i] = km[i, j]
        for i in range(len(Gn)):
            for j, idx in enumerate(idx_rdm):
                km[i, len(Gn) + j] = km[i, idx]
                km[len(Gn) + j, i] = km[i, idx]
        for i, idx1 in enumerate(idx_rdm):
            for j, idx2 in enumerate(idx_rdm):
                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]

        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time0 = time.time()
        dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \
            preimage_iam(Gn, Gn_median,
            alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max,
            gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
            params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es,
                        'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
                        'removeNodes': removeNodes, 'connected': connected_iam},
            params_ged=params_ged)

        time_total = time.time() - time0 + time_km
        print('\ntime: ', time_total)
        time_list.append(time_total)
        print('\nsmallest distance in kernel space: ', dhat)
        dis_ks_min_list.append(dhat)
        g_best.append(ghat_list)
        print('\nnumber of updates of the best graph: ', nb_updated)
        nb_updated_list.append(nb_updated)
        print('\nnumber of updates of k nearest graphs: ', nb_updated_k)
        nb_updated_k_list.append(nb_updated_k)

        # show the best graph and save it to file.
        print('the shortest distance is', dhat)
        print('one of the possible corresponding pre-images is')
        nx.draw(ghat_list[0],
                labels=nx.get_node_attributes(ghat_list[0], 'atom'),
                with_labels=True)
        plt.show()
        #        plt.savefig('results/preimage_iam/mutag_median_cs.001_nb' + str(nb_median) +
        #                    '.png', format="PNG")
        plt.clf()
        #        print(ghat_list[0].nodes(data=True))
        #        print(ghat_list[0].edges(data=True))

        # compute the corresponding sod in graph space.
        sod_tmp, _ = ged_median([ghat_list[0]],
                                Gn_median,
                                params_ged=params_ged)
        sod_gs_list.append(sod_tmp)
        sod_gs_min_list.append(np.min(sod_tmp))
        print('\nsmallest sod in graph space: ', np.min(sod_tmp))

    print('\nsods in graph space: ', sod_gs_list)
    print('\nsmallest sod in graph space for each set of median graphs: ',
          sod_gs_min_list)
    print(
        '\nsmallest distance in kernel space for each set of median graphs: ',
        dis_ks_min_list)
    print(
        '\nnumber of updates of the best graph for each set of median graphs by IAM: ',
        nb_updated_list)
    print(
        '\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ',
        nb_updated_k_list)
    print('\ntimes:', time_list)