def kernelFunction(Data, rowInd, colInd, gamma=0.1, _type_="rbf"):
    # a kernel generator: outputs the submatrix of the associated kernel
    # with variance parameter.
    # input:
    #
    # Data: Data matrix with n points and d features
    # rowInd, colInd: List of indices between [1,n] for each row & col
    # gamma: kernel variance parameter
    #
    # output:
    #
    # Ksub = Let K(i,j) = e^-(-1/(2*gamma)*||X(i,:)-X(j,:)||^2). Then Ksub =
    # K(rowInd,colInd). Or if colInd = [] then Ksub = diag(K)(rowInd).

    if _type_ == "rbf":
        if len(colInd) <= 0:
            Ksub = np.ones((len(rowInd)))
        else:
            Ksub = compute_kernel(Data, rowInd, colInd, kernel_type=_type_, sigma=gamma)
    else:
        pass

    return Ksub
def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_method,
                               graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/py-graph/datasets/monoterpenoides/', 
                               edit_costs=None, group_min=None, dataset='monoterpenoides',
                               cost='CONSTANT', parallel=True):
    dataset = dataset.lower()
    
#    # compute distances in kernel space.
#    dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
#                                              Kmatrix=None, gkernel=gkernel)
#    # ged.
#    gmfile = np.load('results/test_k_closest_graphs/ged_mat.fit_on_whole_dataset.with_medians.gm.npz')
#    ged_mat = gmfile['ged_mat']
#    dis_mat = ged_mat[0:len(Gn), 0:len(Gn)]
    
#    # choose k closest graphs
#    time0 = time.time()
#    sod_ks_min, group_min = get_closest_k_graphs(dis_mat, k, parallel)
#    time_spent = time.time() - time0
#    print('closest graphs:', sod_ks_min, group_min)
#    print('time spent:', time_spent)
#    group_min = (12, 13, 22, 29) # closest w.r.t path kernel
#    group_min = (77, 85, 160, 171) # closest w.r.t ged
#    group_min = (0,1,2,3,4,5,6,7,8,9,10,11) # closest w.r.t treelet kernel
    Gn_median = [Gn[g].copy() for g in group_min]


    # fit edit costs.    
    if fit_method == 'random': # random
        if cost == 'LETTER':
            edit_cost_constant = random.sample(range(1, 10), 3)
            edit_cost_constant = [item * 0.1 for item in edit_cost_constant]
        elif cost == 'LETTER2':
            random.seed(time.time())
            edit_cost_constant = random.sample(range(1, 10), 5)
#            edit_cost_constant = [item * 0.1 for item in edit_cost_constant]
        else:
            edit_cost_constant = random.sample(range(1, 10), 6)
        print('edit costs used:', edit_cost_constant)
    elif fit_method == 'expert': # expert
        edit_cost_constant = [3, 3, 1, 3, 3, 1]
    elif fit_method == 'k-graphs':
        itr_max = 6
        if cost == 'LETTER':
            init_costs = [0.9, 1.7, 0.75] 
        elif cost == 'LETTER2':
            init_costs = [0.675, 0.675, 0.75, 0.425, 0.425]
        else:
            init_costs = [3, 3, 1, 3, 3, 1] 
        algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
        params_ged = {'lib': 'gedlibpy', 'cost': cost, 'method': 'IPFP', 
                      'algo_options': algo_options, 'stabilizer': None}
        # fit on k-graph subset
        edit_cost_constant, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn_median, 
                node_label, edge_label, gkernel, itr_max, params_ged=params_ged, 
                init_costs=init_costs, dataset=dataset, parallel=True)
    elif fit_method == 'whole-dataset':
        itr_max = 6
        if cost == 'LETTER':
            init_costs = [0.9, 1.7, 0.75] 
        elif cost == 'LETTER2':
            init_costs = [0.675, 0.675, 0.75, 0.425, 0.425]
        else:
            init_costs = [3, 3, 1, 3, 3, 1] 
        algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
        params_ged = {'lib': 'gedlibpy', 'cost': cost, 'method': 'IPFP', 
                    'algo_options': algo_options, 'stabilizer': None}
        # fit on all subset
        edit_cost_constant, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn, 
                node_label, edge_label, gkernel, itr_max, params_ged=params_ged, 
                init_costs=init_costs, dataset=dataset, parallel=True)
    elif fit_method == 'precomputed':
        edit_cost_constant = edit_costs

    
    # compute set median and gen median using IAM (C++ through bash).
    group_fnames = [Gn[g].graph['filename'] for g in group_min]
    sod_sm, sod_gm, fname_sm, fname_gm = iam_bash(group_fnames, edit_cost_constant,
                                                  cost=cost, graph_dir=graph_dir, 
                                                  dataset=dataset)
    
    
    # compute distances in kernel space.
    Gn_median = [Gn[g].copy() for g in group_min]
    set_median = loadGXL(fname_sm)
    gen_median = loadGXL(fname_gm)
#    print(gen_median.nodes(data=True))
#    print(gen_median.edges(data=True))
    if dataset == 'letter':
        for g in Gn_median:
            reform_attributes(g)
        reform_attributes(set_median)
        reform_attributes(gen_median)
    
    # compute distance in kernel space for set median.    
    Kmatrix_sm = compute_kernel([set_median] + Gn_median, gkernel, 
                                None if dataset == 'letter' else 'chem', 
                                None if dataset == 'letter' else 'valence', 
                                False)
    dis_k_sm = dis_gstar(0, range(1, 1+len(Gn_median)), 
                         [1 / len(Gn_median)] * len(Gn_median), Kmatrix_sm, withterm3=False)
#    print(gen_median.nodes(data=True))
#    print(gen_median.edges(data=True))
#    print(set_median.nodes(data=True))
#    print(set_median.edges(data=True))
    # compute distance in kernel space for generalized median.
    Kmatrix_gm = compute_kernel([gen_median] + Gn_median, gkernel, 
                                None if dataset == 'letter' else 'chem', 
                                None if dataset == 'letter' else 'valence', 
                                False)
    dis_k_gm = dis_gstar(0, range(1, 1+len(Gn_median)), 
                         [1 / len(Gn_median)] * len(Gn_median), Kmatrix_gm, withterm3=False)
    
    # compute distance in kernel space for each graph in median set.
    dis_k_gi = []
    for idx in range(len(Gn_median)):
        dis_k_gi.append(dis_gstar(idx+1, range(1, 1+len(Gn_median)), 
                             [1 / len(Gn_median)] * len(Gn_median), Kmatrix_gm, withterm3=False))
    
    print('sod_sm:', sod_sm)
    print('sod_gm:', sod_gm)
    print('dis_k_sm:', dis_k_sm)
    print('dis_k_gm:', dis_k_gm)
    print('dis_k_gi:', dis_k_gi)
    idx_dis_k_gi_min = np.argmin(dis_k_gi)
    dis_k_gi_min = dis_k_gi[idx_dis_k_gi_min]
    print('index min dis_k_gi:', group_min[idx_dis_k_gi_min])
    print('min dis_k_gi:', dis_k_gi_min)    
    
    return sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, group_min[idx_dis_k_gi_min]
Exemple #3
0
def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, 
                 gkernel, epsilon=0.001, InitIAMWithAllDk=False,
                 params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 
                             'ite_max': 50, 'epsilon': 0.001, 
                             'removeNodes': True, 'connected': False},
                 params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 'method': 'IPFP', 
                             'edit_cost_constant': [], 'stabilizer': 'min', 
                             'repeat': 50}):
    """This function constructs graph pre-image by the iterative pre-image 
    framework in reference [1], algorithm 1, where the step of generating new 
    graphs randomly is replaced by the IAM algorithm in reference [2].
    
    notes
    -----
    Every time a set of n better graphs is acquired, their distances in kernel space are
    compared with the k nearest ones, and the k nearest distances from the k+n
    distances will be used as the new ones.
    """
    # compute k nearest neighbors of phi in DN.
    dis_all = [] # distance between g_star and each graph.
    term3 = 0
    for i1, a1 in enumerate(alpha):
        for i2, a2 in enumerate(alpha):
            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
        dis_all.append(dtemp)
        
    # sort
    sort_idx = np.argsort(dis_all)
    dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances
    nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist())
    ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
    if dis_k[0] == 0: # the exact pre-image.
        print('The exact pre-image is found from the input dataset.')
        return 0, ghat_list, 0, 0
    dhat = dis_k[0] # the nearest distance
#    for g in ghat_list:
#        draw_Letter_graph(g)
#        nx.draw_networkx(g)
#        plt.show()
#        print(g.nodes(data=True))
#        print(g.edges(data=True))
    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
#    for gi in Gk:
#        nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
##        nx.draw_networkx(gi)
#        plt.show()
##        draw_Letter_graph(g)
#        print(gi.nodes(data=True))
#        print(gi.edges(data=True))
    
#    i = 1
    r = 0
    itr_total = 0
    dis_of_each_itr = [dhat]
    found = False
    nb_updated = 0
    nb_updated_k = 0
    while r < r_max:# and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
        print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
        print('Current preimage iteration =', r)
        print('Total preimage iteration =', itr_total, '\n')
        found = False
        
        Gn_nearest_median = [g.copy() for g in Gk]
        if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM.
            ghat_new_list = []
            for g_tmp in Gk:
                Gn_nearest_init = [g_tmp.copy()]
                ghat_new_list_tmp, _, _ = iam_upgraded(Gn_nearest_median, 
                        Gn_nearest_init, params_ged=params_ged, **params_iam)
                ghat_new_list += ghat_new_list_tmp
        else: # only the best graph in D_k is used to initialize IAM.
            Gn_nearest_init = [g.copy() for g in Gk]
            ghat_new_list, _, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, 
                    params_ged=params_ged, **params_iam)

#        for g in g_tmp_list:
#            nx.draw_networkx(g)
#            plt.show()
#            draw_Letter_graph(g)
#            print(g.nodes(data=True))
#            print(g.edges(data=True))
            
        # compute distance between \psi and the new generated graphs.
        knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
        dhat_new_list = []
        for idx, g_tmp in enumerate(ghat_new_list):
            # @todo: the term3 below could use the one at the beginning of the function.
            dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
                                len(ghat_new_list) + len(Gn_median) + 1), 
                                alpha, knew, withterm3=False))
        
        for idx_g, ghat_new in enumerate(ghat_new_list):          
            dhat_new = dhat_new_list[idx_g]
            
            # if the new distance is smaller than the max of D_k.           
            if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
                # check if the new distance is the same as one in D_k.
                is_duplicate = False
                for dis_tmp in dis_k[1:-1]:
                    if np.abs(dhat_new - dis_tmp) < epsilon:
                        is_duplicate = True
                        print('IAM: duplicate k nearest graph generated.')
                        break
                if not is_duplicate:
                    if np.abs(dhat_new - dhat) < epsilon:
                        print('IAM: I am equal!')
#                        dhat = dhat_new
#                        ghat_list = [ghat_new.copy()]
                    else:
                        print('IAM: we got better k nearest neighbors!')
                        nb_updated_k += 1
                        print('the k nearest neighbors are updated', 
                              nb_updated_k, 'times.')
                        
                        dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance.
                        Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph.
                        sort_idx = np.argsort(dis_k)
                        dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
                        Gk = [Gk[idx] for idx in sort_idx[0:k]]
                        if dhat_new < dhat:
                            print('IAM: I have smaller distance!')
                            print(str(dhat) + '->' + str(dhat_new))
                            dhat = dhat_new
                            ghat_list = [Gk[0].copy()]
                            r = 0
                            nb_updated += 1
                        
                            print('the graph is updated', nb_updated, 'times.')                       
                            nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'), 
                                with_labels=True)
                    ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
                            plt.show()
                        
                        found = True
        if not found:
            r += 1            

        dis_of_each_itr.append(dhat)
        itr_total += 1
        print('\nthe k shortest distances are', dis_k)
        print('the shortest distances for previous iterations are', dis_of_each_itr)
        
    print('\n\nthe graph is updated', nb_updated, 'times.')
    print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.')
    print('distances in kernel space:', dis_of_each_itr, '\n')
    
    return dhat, ghat_list, dis_of_each_itr[-1], nb_updated, nb_updated_k
Exemple #4
0
def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, 
                            l_max, gkernel, epsilon=0.001, 
                            InitIAMWithAllDk=False, InitRandomWithAllDk=True,
                            params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 
                                        'ite_max': 50, 'epsilon': 0.001, 
                                        'removeNodes': True, 'connected': False},
                            params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 
                                        'method': 'IPFP', 'edit_cost_constant': [], 
                                        'stabilizer': 'min', 'repeat': 50}):
    """This function constructs graph pre-image by the iterative pre-image 
    framework in reference [1], algorithm 1, where new graphs are generated 
    randomly and by the IAM algorithm in reference [2].
    
    notes
    -----
    Every time a set of n better graphs is acquired, their distances in kernel space are
    compared with the k nearest ones, and the k nearest distances from the k+n
    distances will be used as the new ones.
    """
    Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
    # compute k nearest neighbors of phi in DN.
    dis_all = [] # distance between g_star and each graph.
    term3 = 0
    for i1, a1 in enumerate(alpha):
        for i2, a2 in enumerate(alpha):
            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
        dis_all.append(dtemp)
        
    # sort
    sort_idx = np.argsort(dis_all)
    dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances
    nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist())
    ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of psi in DN
    if dis_k[0] == 0: # the exact pre-image.
        print('The exact pre-image is found from the input dataset.')
        return 0, ghat_list, 0, 0
    dhat = dis_k[0] # the nearest distance
#    for g in ghat_list:
#        draw_Letter_graph(g)
#        nx.draw_networkx(g)
#        plt.show()
#        print(g.nodes(data=True))
#        print(g.edges(data=True))
    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
#    for gi in Gk:
#        nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
##        nx.draw_networkx(gi)
#        plt.show()
##        draw_Letter_graph(g)
#        print(gi.nodes(data=True))
#        print(gi.edges(data=True))
    
    r = 0
    itr_total = 0
    dis_of_each_itr = [dhat]
    nb_updated_iam = 0
    nb_updated_k_iam = 0
    nb_updated_random = 0
    nb_updated_k_random = 0
#    is_iam_duplicate = False
    while r < r_max: # and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
        print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
        print('Current preimage iteration =', r)
        print('Total preimage iteration =', itr_total, '\n')
        found_iam = False

        Gn_nearest_median = [g.copy() for g in Gk]
        if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM.
            ghat_new_list = []
            for g_tmp in Gk:
                Gn_nearest_init = [g_tmp.copy()]
                ghat_new_list_tmp, _ = iam_upgraded(Gn_nearest_median, 
                        Gn_nearest_init, params_ged=params_ged, **params_iam)
                ghat_new_list += ghat_new_list_tmp
        else: # only the best graph in D_k is used to initialize IAM.
            Gn_nearest_init = [g.copy() for g in Gk]
            ghat_new_list, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, 
                    params_ged=params_ged, **params_iam)

#        for g in g_tmp_list:
#            nx.draw_networkx(g)
#            plt.show()
#            draw_Letter_graph(g)
#            print(g.nodes(data=True))
#            print(g.edges(data=True))
            
        # compute distance between \psi and the new generated graphs.
        knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
        dhat_new_list = []
        
        for idx, g_tmp in enumerate(ghat_new_list):
            # @todo: the term3 below could use the one at the beginning of the function.
            dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
                            len(ghat_new_list) + len(Gn_median) + 1), 
                            alpha, knew, withterm3=False))
                
        # find the new k nearest graphs. 
        for idx_g, ghat_new in enumerate(ghat_new_list):          
            dhat_new = dhat_new_list[idx_g]
            
            # if the new distance is smaller than the max of D_k.           
            if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
                # check if the new distance is the same as one in D_k.
                is_duplicate = False
                for dis_tmp in dis_k[1:-1]:
                    if np.abs(dhat_new - dis_tmp) < epsilon:
                        is_duplicate = True
                        print('IAM: duplicate k nearest graph generated.')
                        break
                if not is_duplicate:
                    if np.abs(dhat_new - dhat) < epsilon:
                        print('IAM: I am equal!')
#                        dhat = dhat_new
#                        ghat_list = [ghat_new.copy()]
                    else:
                        print('IAM: we got better k nearest neighbors!')
                        nb_updated_k_iam += 1
                        print('the k nearest neighbors are updated', 
                              nb_updated_k_iam, 'times.')
                        
                        dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance.
                        Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph.
                        sort_idx = np.argsort(dis_k)
                        dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
                        Gk = [Gk[idx] for idx in sort_idx[0:k]]
                        if dhat_new < dhat:
                            print('IAM: I have smaller distance!')
                            print(str(dhat) + '->' + str(dhat_new))
                            dhat = dhat_new
                            ghat_list = [Gk[0].copy()]
                            r = 0
                            nb_updated_iam += 1
                        
                            print('the graph is updated by IAM', nb_updated_iam, 
                                  'times.')                       
                            nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'), 
                                with_labels=True)
                    ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
                            plt.show()
                        
                        found_iam = True
                        
        # when new distance is not smaller than the max of D_k, use random generation.
        if not found_iam:
            print('Distance not better, switching to random generation now.')
            print(str(dhat) + '->' + str(dhat_new))
            
            if InitRandomWithAllDk: # use all k nearest graphs as the initials.
                init_list = [g_init.copy() for g_init in Gk]
            else: # use just the nearest graph as the initial.
                init_list = [Gk[0].copy()]
            
            # number of edges to be changed.
            if len(init_list) == 1:
                # @todo what if the log is negetive? how to choose alpha (scalar)? seems fdgs is always 1.
    #            fdgs = dhat_new
                fdgs = nb_updated_random + 1
                if fdgs < 1:
                    fdgs = 1
                fdgs = int(np.ceil(np.log(fdgs)))
                if fdgs < 1:
                    fdgs += 1
    #            fdgs = nb_updated_random + 1 # @todo:
                fdgs_list = [fdgs]
            else:
                # @todo what if the log is negetive? how to choose alpha (scalar)?
                fdgs_list = np.array(dis_k[:])
                if np.min(fdgs_list) < 1:
                    fdgs_list /= dis_k[0]
                fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
                if np.min(fdgs_list) < 1:
                    fdgs_list = np.array(fdgs_list) + 1
                
            l = 0
            found_random = False
            while l < l_max and not found_random:
                for idx_g, g_tmp in enumerate(init_list):
                    # add and delete edges.
                    ghat_new = nx.convert_node_labels_to_integers(g_tmp.copy())
                    # @todo: should we use just half of the adjacency matrix for undirected graphs?
                    nb_vpairs = nx.number_of_nodes(ghat_new) * (nx.number_of_nodes(ghat_new) - 1)
                    np.random.seed()
                    # which edges to change.                
                    # @todo: what if fdgs is bigger than nb_vpairs?
                    idx_change = random.sample(range(nb_vpairs), fdgs_list[idx_g] if 
                                               fdgs_list[idx_g] < nb_vpairs else nb_vpairs)
#                idx_change = np.random.randint(0, nx.number_of_nodes(gs) * 
#                                               (nx.number_of_nodes(gs) - 1), fdgs)
                    for item in idx_change:
                        node1 = int(item / (nx.number_of_nodes(ghat_new) - 1))
                        node2 = (item - node1 * (nx.number_of_nodes(ghat_new) - 1))
                        if node2 >= node1: # skip the self pair.
                            node2 += 1
                        # @todo: is the randomness correct?
                        if not ghat_new.has_edge(node1, node2):
                            ghat_new.add_edge(node1, node2)
    #                        nx.draw_networkx(gs)
    #                        plt.show()
    #                        nx.draw_networkx(ghat_new)
    #                        plt.show()
                        else:
                            ghat_new.remove_edge(node1, node2)
    #                        nx.draw_networkx(gs)
    #                        plt.show()
    #                        nx.draw_networkx(ghat_new)
    #                        plt.show()
    #                nx.draw_networkx(ghat_new)
    #                plt.show()
                            
                    # compute distance between \psi and the new generated graph.
                    knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False)
                    dhat_new = dis_gstar(0, range(1, len(Gn_median) + 1), 
                                         alpha, knew, withterm3=False)
                    # @todo: the new distance is smaller or also equal?
                    if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
                        # check if the new distance is the same as one in D_k.
                        is_duplicate = False
                        for dis_tmp in dis_k[1:-1]:
                            if np.abs(dhat_new - dis_tmp) < epsilon:
                                is_duplicate = True
                                print('Random: duplicate k nearest graph generated.')
                                break
                        if not is_duplicate:
                            if np.abs(dhat_new - dhat) < epsilon:
                                print('Random: I am equal!')
        #                        dhat = dhat_new
        #                        ghat_list = [ghat_new.copy()]
                            else:
                                print('Random: we got better k nearest neighbors!')
                                print('l =', str(l))
                                nb_updated_k_random += 1
                                print('the k nearest neighbors are updated by random generation', 
                                          nb_updated_k_random, 'times.')
                                
                                dis_k = [dhat_new] + dis_k # add the new nearest distances.
                                Gk = [ghat_new.copy()] + Gk # add the corresponding graphs.
                                sort_idx = np.argsort(dis_k)
                                dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
                                Gk = [Gk[idx] for idx in sort_idx[0:k]]
                                if dhat_new < dhat:
                                    print('\nRandom: I am smaller!')
                                    print('l =', str(l))
                                    print(dhat, '->', dhat_new)                       
                                    dhat = dhat_new
                                    ghat_list = [ghat_new.copy()]
                                    r = 0
                                    nb_updated_random += 1
        
                                    print('the graph is updated by random generation', 
                                          nb_updated_random, 'times.')
                                             
                                    nx.draw(ghat_new, labels=nx.get_node_attributes(ghat_new, 'atom'), 
                                        with_labels=True)
        ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
                                    plt.show()
                                found_random = True
                                break
                l += 1
            if not found_random: # l == l_max:
                r += 1            
            
        dis_of_each_itr.append(dhat)
        itr_total += 1
        print('\nthe k shortest distances are', dis_k)
        print('the shortest distances for previous iterations are', dis_of_each_itr)
        
    print('\n\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation',
          nb_updated_random, 'times.')
    print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam, 
          'times, and by random generation', nb_updated_k_random, 'times.')
    print('distances in kernel space:', dis_of_each_itr, '\n')
    
    return dhat, ghat_list, dis_of_each_itr[-1], \
            nb_updated_iam, nb_updated_random, nb_updated_k_iam, nb_updated_k_random


###############################################################################
# Old implementations.
    
#def gk_iam(Gn, alpha):
#    """This function constructs graph pre-image by the iterative pre-image 
#    framework in reference [1], algorithm 1, where the step of generating new 
#    graphs randomly is replaced by the IAM algorithm in reference [2].
#    
#    notes
#    -----
#    Every time a better graph is acquired, the older one is replaced by it.
#    """
#    pass
#    # compute k nearest neighbors of phi in DN.
#    dis_list = [] # distance between g_star and each graph.
#    for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
#        dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
#                      k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * 
#                      (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
#                      k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
#        dis_list.append(dtemp)
#        
#    # sort
#    sort_idx = np.argsort(dis_list)
#    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
#    g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
#    if dis_gs[0] == 0: # the exact pre-image.
#        print('The exact pre-image is found from the input dataset.')
#        return 0, g0hat
#    dhat = dis_gs[0] # the nearest distance
#    Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
#    gihat_list = []
#    
##    i = 1
#    r = 1
#    while r < r_max:
#        print('r =', r)
##        found = False
#        Gs_nearest = Gk + gihat_list
#        g_tmp = iam(Gs_nearest)
#        
#        # compute distance between \psi and the new generated graph.
#        knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None,
#                       p_quit=lmbda, n_iteration=20, remove_totters=False,
#                       n_jobs=multiprocessing.cpu_count(), verbose=False)
#        dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) * 
#              knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha * 
#              (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
#              k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
#        if dnew <= dhat: # the new distance is smaller
#            print('I am smaller!')
#            dhat = dnew
#            g_new = g_tmp.copy() # found better graph.
#            gihat_list = [g_new]
#            dis_gs.append(dhat)
#            r = 0
#        else:
#            r += 1
#            
#    ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list)
#    
#    return dhat, ghat


#def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max):
#    """This function constructs graph pre-image by the iterative pre-image 
#    framework in reference [1], algorithm 1, where the step of generating new 
#    graphs randomly is replaced by the IAM algorithm in reference [2].
#    
#    notes
#    -----
#    Every time a better graph is acquired, its distance in kernel space is
#    compared with the k nearest ones, and the k nearest distances from the k+1
#    distances will be used as the new ones.
#    """
#    # compute k nearest neighbors of phi in DN.
#    dis_list = [] # distance between g_star and each graph.
#    for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
#        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
##        dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
##                      k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * 
##                      (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * 
##                      k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
#        dis_list.append(dtemp)
#        
#    # sort
#    sort_idx = np.argsort(dis_list)
#    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
#    g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
#    if dis_gs[0] == 0: # the exact pre-image.
#        print('The exact pre-image is found from the input dataset.')
#        return 0, g0hat
#    dhat = dis_gs[0] # the nearest distance
#    ghat = g0hat.copy()
#    Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
#    for gi in Gk:
#        nx.draw_networkx(gi)
#        plt.show()
#        print(gi.nodes(data=True))
#        print(gi.edges(data=True))
#    Gs_nearest = Gk.copy()
##    gihat_list = []
#    
##    i = 1
#    r = 1
#    while r < r_max:
#        print('r =', r)
##        found = False
##        Gs_nearest = Gk + gihat_list
##        g_tmp = iam(Gs_nearest)
#        g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1)
#        nx.draw_networkx(g_tmp)
#        plt.show()
#        print(g_tmp.nodes(data=True))
#        print(g_tmp.edges(data=True))
#        
#        # compute distance between \psi and the new generated graph.
#        gi_list = [Gn[i] for i in idx_gi]
#        knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False)
#        dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew)
#        
##        dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * 
##              knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * 
##              alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * 
##              k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
#        if dnew <= dhat and g_tmp != ghat: # the new distance is smaller
#            print('I am smaller!')
#            print(str(dhat) + '->' + str(dnew))
##            nx.draw_networkx(ghat)
##            plt.show()
##            print('->')
##            nx.draw_networkx(g_tmp)
##            plt.show()
#            
#            dhat = dnew
#            g_new = g_tmp.copy() # found better graph.
#            ghat = g_tmp.copy()
#            dis_gs.append(dhat) # add the new nearest distance.
#            Gs_nearest.append(g_new) # add the corresponding graph.
#            sort_idx = np.argsort(dis_gs)
#            dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
#            Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
#            r = 0
#        else:
#            r += 1
#    
#    return dhat, ghat


#def gk_iam_nearest_multi(Gn, alpha, idx_gi, Kmatrix, k, r_max):
#    """This function constructs graph pre-image by the iterative pre-image 
#    framework in reference [1], algorithm 1, where the step of generating new 
#    graphs randomly is replaced by the IAM algorithm in reference [2].
#    
#    notes
#    -----
#    Every time a set of n better graphs is acquired, their distances in kernel space are
#    compared with the k nearest ones, and the k nearest distances from the k+n
#    distances will be used as the new ones.
#    """
#    Gn_median = [Gn[idx].copy() for idx in idx_gi]
#    # compute k nearest neighbors of phi in DN.
#    dis_list = [] # distance between g_star and each graph.
#    for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
#        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
##        dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
##                      k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * 
##                      (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * 
##                      k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
#        dis_list.append(dtemp)
#        
#    # sort
#    sort_idx = np.argsort(dis_list)
#    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
#    nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
#    g0hat_list = [Gn[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
#    if dis_gs[0] == 0: # the exact pre-image.
#        print('The exact pre-image is found from the input dataset.')
#        return 0, g0hat_list
#    dhat = dis_gs[0] # the nearest distance
#    ghat_list = [g.copy() for g in g0hat_list]
#    for g in ghat_list:
#        nx.draw_networkx(g)
#        plt.show()
#        print(g.nodes(data=True))
#        print(g.edges(data=True))
#    Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
#    for gi in Gk:
#        nx.draw_networkx(gi)
#        plt.show()
#        print(gi.nodes(data=True))
#        print(gi.edges(data=True))
#    Gs_nearest = Gk.copy()
##    gihat_list = []
#    
##    i = 1
#    r = 1
#    while r < r_max:
#        print('r =', r)
##        found = False
##        Gs_nearest = Gk + gihat_list
##        g_tmp = iam(Gs_nearest)
#        g_tmp_list = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
#                Gn_median, Gs_nearest, c_ei=1, c_er=1, c_es=1)
#        for g in g_tmp_list:
#            nx.draw_networkx(g)
#            plt.show()
#            print(g.nodes(data=True))
#            print(g.edges(data=True))
#        
#        # compute distance between \psi and the new generated graphs.
#        gi_list = [Gn[i] for i in idx_gi]
#        knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False)
#        dnew_list = []
#        for idx, g_tmp in enumerate(g_tmp_list):
#            dnew_list.append(dis_gstar(idx, range(len(g_tmp_list), 
#                            len(g_tmp_list) + len(gi_list) + 1), alpha, knew))
#        
##        dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * 
##              knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * 
##              alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * 
##              k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
#            
#        # find the new k nearest graphs.
#        dis_gs = dnew_list + dis_gs # add the new nearest distances.
#        Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs.
#        sort_idx = np.argsort(dis_gs)
#        if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
#            print('We got better k nearest neighbors! Hurray!')
#            dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
#            print(dis_gs[-1])
#            Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
#            nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
#            if len([i for i in sort_idx[0:nb_best] if i < len(dnew_list)]) > 0:
#                print('I have smaller or equal distance!')
#                dhat = dis_gs[0]
#                print(str(dhat) + '->' + str(dhat))
#                idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist()
#                ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list]
#                for g in ghat_list:
#                    nx.draw_networkx(g)
#                    plt.show()
#                    print(g.nodes(data=True))
#                    print(g.edges(data=True))
#            r = 0
#        else:
#            r += 1
#    
#    return dhat, ghat_list
Exemple #5
0
def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l,
                    gkernel):
    Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]

    # compute k nearest neighbors of phi in DN.
    dis_list = []  # distance between g_star and each graph.
    term3 = 0
    for i1, a1 in enumerate(alpha):
        for i2, a2 in enumerate(alpha):
            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
    for ig, g in tqdm(enumerate(Gn_init),
                      desc='computing distances',
                      file=sys.stdout):
        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
        dis_list.append(dtemp)
#    print(np.max(dis_list))
#    print(np.min(dis_list))
#    print(np.min([item for item in dis_list if item != 0]))
#    print(np.mean(dis_list))

# sort
    sort_idx = np.argsort(dis_list)
    dis_gs = [dis_list[idis]
              for idis in sort_idx[0:k]]  # the k shortest distances
    nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
    g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]
                  ]  # the nearest neighbors of phi in DN
    if dis_gs[0] == 0:  # the exact pre-image.
        print('The exact pre-image is found from the input dataset.')
        return 0, g0hat_list[0], 0
    dhat = dis_gs[0]  # the nearest distance
    #    ghat_list = [g.copy() for g in g0hat_list]
    #    for g in ghat_list:
    #        draw_Letter_graph(g)
    #        nx.draw_networkx(g)
    #        plt.show()
    #        print(g.nodes(data=True))
    #        print(g.edges(data=True))
    Gk = [Gn_init[ig].copy()
          for ig in sort_idx[0:k]]  # the k nearest neighbors
    #    for gi in Gk:
    ##        nx.draw_networkx(gi)
    ##        plt.show()
    #        draw_Letter_graph(g)
    #        print(gi.nodes(data=True))
    #        print(gi.edges(data=True))
    Gs_nearest = [g.copy() for g in Gk]
    gihat_list = []
    dihat_list = []

    #    i = 1
    r = 0
    #    sod_list = [dhat]
    #    found = False
    dis_of_each_itr = [dhat]
    nb_updated = 0
    g_best = []
    while r < r_max:
        print('\nr =', r)
        print('itr for gk =', nb_updated, '\n')
        found = False
        dis_bests = dis_gs + dihat_list
        # @todo what if the log is negetive? how to choose alpha (scalar)?
        fdgs_list = np.array(dis_bests)
        if np.min(fdgs_list) < 1:
            fdgs_list /= np.min(dis_bests)
        fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
        if np.min(fdgs_list) < 1:
            fdgs_list = np.array(fdgs_list) + 1

        for ig, gs in enumerate(Gs_nearest + gihat_list):
            #            nx.draw_networkx(gs)
            #            plt.show()
            for trail in range(0, l):
                #            for trail in tqdm(range(0, l), desc='l loops', file=sys.stdout):
                # add and delete edges.
                gtemp = gs.copy()
                np.random.seed()
                # which edges to change.
                # @todo: should we use just half of the adjacency matrix for undirected graphs?
                nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) -
                                                      1)
                # @todo: what if fdgs is bigger than nb_vpairs?
                idx_change = random.sample(
                    range(nb_vpairs),
                    fdgs_list[ig] if fdgs_list[ig] < nb_vpairs else nb_vpairs)
                #                idx_change = np.random.randint(0, nx.number_of_nodes(gs) *
                #                                               (nx.number_of_nodes(gs) - 1), fdgs)
                for item in idx_change:
                    node1 = int(item / (nx.number_of_nodes(gs) - 1))
                    node2 = (item - node1 * (nx.number_of_nodes(gs) - 1))
                    if node2 >= node1:  # skip the self pair.
                        node2 += 1
                    # @todo: is the randomness correct?
                    if not gtemp.has_edge(node1, node2):
                        gtemp.add_edge(node1, node2)
#                        nx.draw_networkx(gs)
#                        plt.show()
#                        nx.draw_networkx(gtemp)
#                        plt.show()
                    else:
                        gtemp.remove_edge(node1, node2)
#                        nx.draw_networkx(gs)
#                        plt.show()
#                        nx.draw_networkx(gtemp)
#                        plt.show()
#                nx.draw_networkx(gtemp)
#                plt.show()

# compute distance between \psi and the new generated graph.
#                knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None,
#                               p_quit=lmbda, n_iteration=20, remove_totters=False,
#                               n_jobs=multiprocessing.cpu_count(), verbose=False)
                knew = compute_kernel([gtemp] + Gn_median,
                                      gkernel,
                                      verbose=False)
                dnew = dis_gstar(0,
                                 range(1,
                                       len(Gn_median) + 1),
                                 alpha,
                                 knew,
                                 withterm3=False)
                if dnew <= dhat:  # @todo: the new distance is smaller or also equal?
                    if dnew < dhat:
                        print('\nI am smaller!')
                        print('ig =', str(ig), ', l =', str(trail))
                        print(dhat, '->', dnew)
                        nb_updated += 1
                    elif dnew == dhat:
                        print('I am equal!')
#                    nx.draw_networkx(gtemp)
#                    plt.show()
#                    print(gtemp.nodes(data=True))
#                    print(gtemp.edges(data=True))
                    dhat = dnew
                    gnew = gtemp.copy()
                    found = True  # found better graph.
        if found:
            r = 0
            gihat_list = [gnew]
            dihat_list = [dhat]
        else:
            r += 1

        dis_of_each_itr.append(dhat)
        print('the shortest distances for previous iterations are',
              dis_of_each_itr)
#    dis_best.append(dhat)
    g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
    print('distances in kernel space:', dis_of_each_itr, '\n')

    return dhat, g_best, nb_updated
Exemple #6
0
    idx1 = 0
    idx2 = 6
    g1 = DN[idx1]
    g2 = DN[idx2]

    # compute
    k_list = []  # kernel between each graph and itself.
    k_g1_list = []  # kernel between each graph and g1
    k_g2_list = []  # kernel between each graph and g2
    for ig, g in tqdm(enumerate(DN),
                      desc='computing self kernels',
                      file=sys.stdout):
        #    ktemp = marginalizedkernel([g, g1, g2], node_label='atom', edge_label=None,
        #                               p_quit=lmbda, n_iteration=20, remove_totters=False,
        #                               n_jobs=multiprocessing.cpu_count(), verbose=False)
        ktemp = compute_kernel([g, g1, g2], 'untilhpathkernel', verbose=False)
        k_list.append(ktemp[0, 0])
        k_g1_list.append(ktemp[0, 1])
        k_g2_list.append(ktemp[0, 2])

    g_best = []
    dis_best = []
    # for each alpha
    for alpha in alpha_range:
        print('alpha =', alpha)
        # compute k nearest neighbors of phi in DN.
        dis_list = []  # distance between g_star and each graph.
        for ig, g in tqdm(enumerate(DN),
                          desc='computing distances',
                          file=sys.stdout):
            dtemp = k_list[ig] - 2 * (
Exemple #7
0
def test_iam_fitdistance():

    ds = {
        'name': 'MUTAG',
        'dataset': '../datasets/MUTAG/MUTAG_A.txt',
        'extra_params': {}
    }  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    #    Gn = Gn[0:50]
    #    remove_edges(Gn)
    gkernel = 'marginalizedkernel'
    node_label = 'atom'
    edge_label = 'bond_type'

    #    lmbda = 0.03 # termination probalility
    #    # parameters for GED function
    #    c_vi = 0.037
    #    c_vr = 0.038
    #    c_vs = 0.075
    #    c_ei = 0.001
    #    c_er = 0.001
    #    c_es = 0.0
    #    ite_max_iam = 50
    #    epsilon_iam = 0.001
    #    removeNodes = False
    #    connected_iam = False
    #    # parameters for IAM function
    #    ged_cost = 'CONSTANT'
    #    ged_method = 'IPFP'
    #    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
    #    ged_stabilizer = 'min'
    #    ged_repeat = 50
    #    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
    #                  'edit_cost_constant': edit_cost_constant,
    #                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}

    # parameters for GED function
    c_vi = 4
    c_vr = 4
    c_vs = 2
    c_ei = 1
    c_er = 1
    c_es = 1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = False
    connected_iam = False
    # parameters for IAM function
    ged_cost = 'CHEM_1'
    ged_method = 'IPFP'
    edit_cost_constant = []
    ged_stabilizer = 'min'
    ged_repeat = 50
    params_ged = {
        'lib': 'gedlibpy',
        'cost': ged_cost,
        'method': ged_method,
        'edit_cost_constant': edit_cost_constant,
        'stabilizer': ged_stabilizer,
        'repeat': ged_repeat
    }

    # find out all the graphs classified to positive group 1.
    idx_dict = get_same_item_indices(y_all)
    Gn = [Gn[i] for i in idx_dict[1]]

    # number of graphs; we what to compute the median of these graphs.
    #    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
    nb_median_range = [10]

    #    # compute Gram matrix.
    #    time0 = time.time()
    #    km = compute_kernel(Gn, gkernel, True)
    #    time_km = time.time() - time0
    #    # write Gram matrix to file.
    #    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)

    time_list = []
    dis_ks_min_list = []
    dis_ks_gen_median_list = []
    sod_gs_list = []
    #    sod_gs_min_list = []
    #    nb_updated_list = []
    #    nb_updated_k_list = []
    g_best = []
    for nb_median in nb_median_range:
        print('\n-------------------------------------------------------')
        print('number of median graphs =', nb_median)
        random.seed(1)
        idx_rdm = random.sample(range(len(Gn)), nb_median)
        print('graphs chosen:', idx_rdm)
        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
        Gn_candidate = [g.copy() for g in Gn_median]

        #        for g in Gn_median:
        #            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
        ##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
        #            plt.show()
        #            plt.clf()

        ###################################################################
        #        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
        #        km_tmp = gmfile['gm']
        #        time_km = gmfile['gmtime']
        #        # modify mixed gram matrix.
        #        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
        #        for i in range(len(Gn)):
        #            for j in range(i, len(Gn)):
        #                km[i, j] = km_tmp[i, j]
        #                km[j, i] = km[i, j]
        #        for i in range(len(Gn)):
        #            for j, idx in enumerate(idx_rdm):
        #                km[i, len(Gn) + j] = km[i, idx]
        #                km[len(Gn) + j, i] = km[i, idx]
        #        for i, idx1 in enumerate(idx_rdm):
        #            for j, idx2 in enumerate(idx_rdm):
        #                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]

        ###################################################################
        alpha_range = [1 / nb_median] * nb_median
        time0 = time.time()
        G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
            = iam_upgraded(Gn_median, Gn_candidate,
            c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
            epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
            params_ged=params_ged)

        time_total = time.time() - time0
        print('\ntime: ', time_total)
        time_list.append(time_total)

        # compute distance between \psi and the new generated graphs.
        knew = compute_kernel(G_gen_median_list + Gn_median, gkernel,
                              node_label, edge_label, False)
        dhat_new_list = []
        for idx, g_tmp in enumerate(G_gen_median_list):
            # @todo: the term3 below could use the one at the beginning of the function.
            dhat_new_list.append(
                dis_gstar(idx,
                          range(len(G_gen_median_list),
                                len(G_gen_median_list) + len(Gn_median) + 1),
                          alpha_range,
                          knew,
                          withterm3=False))

        print('\nsmallest distance in kernel space: ', dhat_new_list[0])
        dis_ks_min_list.append(dhat_new_list[0])
        g_best.append(G_gen_median_list[0])

        # show the best graph and save it to file.
        #        print('the shortest distance is', dhat)
        print('one of the possible corresponding pre-images is')
        nx.draw(G_gen_median_list[0],
                labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'),
                with_labels=True)
        plt.show()
        #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
        #        plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) +
        #                    '.png', format="PNG")
        plt.clf()
        #        print(ghat_list[0].nodes(data=True))
        #        print(ghat_list[0].edges(data=True))

        sod_gs_list.append(sod_gen_median)
        #        sod_gs_min_list.append(np.min(sod_gen_median))
        print('\nsmallest sod in graph space: ', sod_gen_median)
        print('\nsmallest sod of set median in graph space: ', sod_set_median)

    print('\nsods in graph space: ', sod_gs_list)
    #    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
    print(
        '\nsmallest distance in kernel space for each set of median graphs: ',
        dis_ks_min_list)
    #    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ',
    #          nb_updated_list)
    #    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ',
    #          nb_updated_k_list)
    print('\ntimes:', time_list)
Exemple #8
0
def test_iam_letter_h():
    from median import draw_Letter_graph
    ds = {
        'name': 'Letter-high',
        'dataset': '../datasets/Letter-high/Letter-high_A.txt',
        'extra_params': {}
    }  # node nsymb
    #    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
    #          'extra_params': {}} # node nsymb
    #    Gn = Gn[0:50]
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    gkernel = 'structuralspkernel'

    # parameters for GED function from the IAM paper.
    c_vi = 3
    c_vr = 3
    c_vs = 1
    c_ei = 3
    c_er = 3
    c_es = 1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = False
    connected_iam = False
    # parameters for IAM function
    #    ged_cost = 'CONSTANT'
    ged_cost = 'LETTER'
    ged_method = 'IPFP'
    #    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
    edit_cost_constant = []
    ged_stabilizer = 'min'
    ged_repeat = 50
    params_ged = {
        'lib': 'gedlibpy',
        'cost': ged_cost,
        'method': ged_method,
        'edit_cost_constant': edit_cost_constant,
        'stabilizer': ged_stabilizer,
        'repeat': ged_repeat
    }

    # classify graphs according to letters.
    time_list = []
    dis_ks_min_list = []
    sod_gs_list = []
    g_best = []
    sod_set_median_list = []
    idx_dict = get_same_item_indices(y_all)
    for letter in idx_dict:
        print('\n-------------------------------------------------------')
        print('letter', letter)
        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]

        time_list.append([])
        dis_ks_min_list.append([])
        sod_gs_list.append([])
        g_best.append([])
        sod_set_median_list.append([])

        for repeat in range(50):
            idx_rdm = random.sample(range(len(Gn_let)), 50)
            print('graphs chosen:', idx_rdm)
            Gn_median = [Gn_let[idx].copy() for idx in idx_rdm]
            Gn_candidate = [g.copy() for g in Gn_median]

            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
            time0 = time.time()
            ghat_new_list, sod_min, sod_set_median = iam_upgraded(
                Gn_median,
                Gn_candidate,
                c_ei=c_ei,
                c_er=c_er,
                c_es=c_es,
                ite_max=ite_max_iam,
                epsilon=epsilon_iam,
                connected=connected_iam,
                removeNodes=removeNodes,
                params_ged=params_ged)
            time_total = time.time() - time0
            print('\ntime: ', time_total)
            time_list[-1].append(time_total)
            g_best[-1].append(ghat_new_list[0])
            sod_set_median_list[-1].append(sod_set_median)
            print('\nsmallest sod of the set median:', sod_set_median)
            sod_gs_list[-1].append(sod_min)
            print('\nsmallest sod in graph space:', sod_min)

            # show the best graph and save it to file.
            print('one of the possible corresponding pre-images is')
            draw_Letter_graph(ghat_new_list[0],
                              savepath='results/iam/paper_compare/')

            # compute distance between \psi and the new generated graphs.
            knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
            dhat_new_list = []
            for idx, g_tmp in enumerate(ghat_new_list):
                # @todo: the term3 below could use the one at the beginning of the function.
                dhat_new_list.append(
                    dis_gstar(idx,
                              range(len(ghat_new_list),
                                    len(ghat_new_list) + len(Gn_median) + 1),
                              alpha_range,
                              knew,
                              withterm3=False))

            print('\nsmallest distance in kernel space: ', dhat_new_list[0])
            dis_ks_min_list[-1].append(dhat_new_list[0])

        print('\nsods of the set median for this letter:',
              sod_set_median_list[-1])
        print('\nsods in graph space for this letter:', sod_gs_list[-1])
        print('\nsmallest distances in kernel space for this letter:',
              dis_ks_min_list[-1])
        print('\ntimes for this letter:', time_list[-1])

        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
        time_list[-1] = np.mean(time_list[-1])

    print('\nmean sods of the set median for each letter:',
          sod_set_median_list)
    print('\nmean sods in graph space for each letter:', sod_gs_list)
    print('\nmean smallest distances in kernel space for each letter:',
          dis_ks_min_list)
    print('\nmean times for each letter:', time_list)

    print('\nmean sods of the set median of all:',
          np.mean(sod_set_median_list))
    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
    print('\nmean smallest distances in kernel space of all:',
          np.mean(dis_ks_min_list))
    print('\nmean times of all:', np.mean(time_list))
Exemple #9
0
def test_iam_mutag():
    ds = {
        'name': 'MUTAG',
        'dataset': '../datasets/MUTAG/MUTAG_A.txt',
        'extra_params': {}
    }  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    #    Gn = Gn[0:50]
    gkernel = 'untilhpathkernel'
    node_label = 'atom'
    edge_label = 'bond_type'

    # parameters for GED function from the IAM paper.
    # fitted edit costs.
    c_vi = 0.03523843108436513
    c_vr = 0.03347339739350128
    c_vs = 0.06871290673612238
    c_ei = 0.08591999846720685
    c_er = 0.07962086440894103
    c_es = 0.08596855855478233
    # unfitted edit costs.
    #    c_vi = 3
    #    c_vr = 3
    #    c_vs = 1
    #    c_ei = 3
    #    c_er = 3
    #    c_es = 1
    ite_max_iam = 50
    epsilon_iam = 0.001
    removeNodes = False
    connected_iam = False
    # parameters for IAM function
    #    ged_cost = 'CONSTANT'
    ged_cost = 'CONSTANT'
    ged_method = 'IPFP'
    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
    #    edit_cost_constant = []
    ged_stabilizer = 'min'
    ged_repeat = 50
    params_ged = {
        'lib': 'gedlibpy',
        'cost': ged_cost,
        'method': ged_method,
        'edit_cost_constant': edit_cost_constant,
        'stabilizer': ged_stabilizer,
        'repeat': ged_repeat
    }

    # classify graphs according to letters.
    time_list = []
    dis_ks_min_list = []
    dis_ks_set_median_list = []
    sod_gs_list = []
    g_best = []
    sod_set_median_list = []
    sod_list_list = []
    idx_dict = get_same_item_indices(y_all)
    for y_class in idx_dict:
        print('\n-------------------------------------------------------')
        print('class of y:', y_class)
        Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]

        time_list.append([])
        dis_ks_min_list.append([])
        dis_ks_set_median_list.append([])
        sod_gs_list.append([])
        g_best.append([])
        sod_set_median_list.append([])

        for repeat in range(50):
            idx_rdm = random.sample(range(len(Gn_class)), 10)
            print('graphs chosen:', idx_rdm)
            Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
            Gn_candidate = [g.copy() for g in Gn_median]

            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
            time0 = time.time()
            G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
            = iam_upgraded(Gn_median,
                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
                epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
                params_ged=params_ged)
            time_total = time.time() - time0
            print('\ntime: ', time_total)
            time_list[-1].append(time_total)
            g_best[-1].append(G_gen_median_list[0])
            sod_set_median_list[-1].append(sod_set_median)
            print('\nsmallest sod of the set median:', sod_set_median)
            sod_gs_list[-1].append(sod_gen_median)
            print('\nsmallest sod in graph space:', sod_gen_median)
            sod_list_list.append(sod_list)

            # show the best graph and save it to file.
            print('one of the possible corresponding pre-images is')
            nx.draw(G_gen_median_list[0],
                    labels=nx.get_node_attributes(G_gen_median_list[0],
                                                  'atom'),
                    with_labels=True)
            #            plt.show()
            #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
            #            plt.savefig('results/iam/paper_compare/mutag_y' + str(y_class) +
            #                        '_repeat' + str(repeat) + '_' + str(time.time()) +
            #                        '.png', format="PNG")
            plt.clf()
            #        print(G_gen_median_list[0].nodes(data=True))
            #        print(G_gen_median_list[0].edges(data=True))

            # compute distance between \psi and the set median graph.
            knew_set_median = compute_kernel(G_set_median_list + Gn_median,
                                             gkernel, node_label, edge_label,
                                             False)
            dhat_new_set_median_list = []
            for idx, g_tmp in enumerate(G_set_median_list):
                # @todo: the term3 below could use the one at the beginning of the function.
                dhat_new_set_median_list.append(
                    dis_gstar(idx,
                              range(
                                  len(G_set_median_list),
                                  len(G_set_median_list) + len(Gn_median) + 1),
                              alpha_range,
                              knew_set_median,
                              withterm3=False))

            print('\ndistance in kernel space of set median: ',
                  dhat_new_set_median_list[0])
            dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])

            # compute distance between \psi and the new generated graphs.
            knew = compute_kernel(G_gen_median_list + Gn_median, gkernel,
                                  node_label, edge_label, False)
            dhat_new_list = []
            for idx, g_tmp in enumerate(G_gen_median_list):
                # @todo: the term3 below could use the one at the beginning of the function.
                dhat_new_list.append(
                    dis_gstar(idx,
                              range(
                                  len(G_gen_median_list),
                                  len(G_gen_median_list) + len(Gn_median) + 1),
                              alpha_range,
                              knew,
                              withterm3=False))

            print('\nsmallest distance in kernel space: ', dhat_new_list[0])
            dis_ks_min_list[-1].append(dhat_new_list[0])

        print('\nsods of the set median for this class:',
              sod_set_median_list[-1])
        print('\nsods in graph space for this class:', sod_gs_list[-1])
        print('\ndistance in kernel space of set median for this class:',
              dis_ks_set_median_list[-1])
        print('\nsmallest distances in kernel space for this class:',
              dis_ks_min_list[-1])
        print('\ntimes for this class:', time_list[-1])

        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
        dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
        time_list[-1] = np.mean(time_list[-1])

    print()
    print('\nmean sods of the set median for each class:', sod_set_median_list)
    print('\nmean sods in graph space for each class:', sod_gs_list)
    print('\ndistances in kernel space of set median for each class:',
          dis_ks_set_median_list)
    print('\nmean smallest distances in kernel space for each class:',
          dis_ks_min_list)
    print('\nmean times for each class:', time_list)

    print('\nmean sods of the set median of all:',
          np.mean(sod_set_median_list))
    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
    print('\nmean distances in kernel space of set median of all:',
          np.mean(dis_ks_set_median_list))
    print('\nmean smallest distances in kernel space of all:',
          np.mean(dis_ks_min_list))
    print('\nmean times of all:', np.mean(time_list))

    nb_better_sods = 0
    nb_worse_sods = 0
    nb_same_sods = 0
    for sods in sod_list_list:
        if sods[0] > sods[-1]:
            nb_better_sods += 1
        elif sods[0] < sods[-1]:
            nb_worse_sods += 1
        else:
            nb_same_sods += 1
    print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods),
          'are getting better,', str(nb_worse_sods), 'are getting worse,',
          str(nb_same_sods), 'are not changed; ',
          str(nb_better_sods / len(sod_list_list)), 'sods are improved.')