Ejemplo n.º 1
0
def test_who_is_the_closest_in_kernel_space(Gn):
    idx_gi = [0, 6]
    g1 = Gn[idx_gi[0]]
    g2 = Gn[idx_gi[1]]
    # create the "median" graph.
    gnew = g2.copy()
    gnew.remove_node(0)
    nx.draw_networkx(gnew)
    plt.show()
    print(gnew.nodes(data=True))
    Gn = [gnew] + Gn

    # compute gram matrix
    Kmatrix = compute_kernel(Gn, 'untilhpathkernel', True)
    # the distance matrix
    dmatrix = gram2distances(Kmatrix)
    print(np.sort(dmatrix[idx_gi[0] + 1]))
    print(np.argsort(dmatrix[idx_gi[0] + 1]))
    print(np.sort(dmatrix[idx_gi[1] + 1]))
    print(np.argsort(dmatrix[idx_gi[1] + 1]))
    # for all g in Gn, compute (d(g1, g) + d(g2, g)) / 2
    dis_median = [(dmatrix[i, idx_gi[0] + 1] + dmatrix[i, idx_gi[1] + 1]) / 2
                  for i in range(len(Gn))]
    print(np.sort(dis_median))
    print(np.argsort(dis_median))
    return
Ejemplo n.º 2
0
def test_the_simple_two(Gn, gkernel):
    from gk_iam import gk_iam_nearest_multi, compute_kernel
    lmbda = 0.03  # termination probalility
    r_max = 10  # recursions
    l = 500
    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 2  # k nearest neighbors

    # randomly select two molecules
    np.random.seed(1)
    idx_gi = [0, 6]  # np.random.randint(0, len(Gn), 2)
    g1 = Gn[idx_gi[0]]
    g2 = Gn[idx_gi[1]]
    Gn_mix = [g.copy() for g in Gn]
    Gn_mix.append(g1.copy())
    Gn_mix.append(g2.copy())

    #    g_tmp = iam([g1, g2])
    #    nx.draw_networkx(g_tmp)
    #    plt.show()

    # compute
    #    k_list = [] # kernel between each graph and itself.
    #    k_g1_list = [] # kernel between each graph and g1
    #    k_g2_list = [] # kernel between each graph and g2
    #    for ig, g in tqdm(enumerate(Gn), desc='computing self kernels', file=sys.stdout):
    #        ktemp = compute_kernel([g, g1, g2], 'marginalizedkernel', False)
    #        k_list.append(ktemp[0][0, 0])
    #        k_g1_list.append(ktemp[0][0, 1])
    #        k_g2_list.append(ktemp[0][0, 2])

    km = compute_kernel(Gn_mix, gkernel, True)
    #    k_list = np.diag(km) # kernel between each graph and itself.
    #    k_g1_list = km[idx_gi[0]] # kernel between each graph and g1
    #    k_g2_list = km[idx_gi[1]] # kernel between each graph and g2

    g_best = []
    dis_best = []
    # for each alpha
    for alpha in alpha_range:
        print('alpha =', alpha)
        dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2],
                                               [alpha, 1 - alpha],
                                               range(len(Gn),
                                                     len(Gn) + 2), km, k,
                                               r_max, gkernel)
        dis_best.append(dhat)
        g_best.append(ghat_list)

    for idx, item in enumerate(alpha_range):
        print('when alpha is', item, 'the shortest distance is', dis_best[idx])
        print('the corresponding pre-images are')
        for g in g_best[idx]:
            nx.draw_networkx(g)
            plt.show()
            print(g.nodes(data=True))
            print(g.edges(data=True))
Ejemplo n.º 3
0
def test_remove_bests(Gn, gkernel):
    from gk_iam import gk_iam_nearest_multi, compute_kernel
    lmbda = 0.03  # termination probalility
    r_max = 10  # recursions
    l = 500
    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 20  # k nearest neighbors

    # randomly select two molecules
    np.random.seed(1)
    idx_gi = [0, 6]  # np.random.randint(0, len(Gn), 2)
    g1 = Gn[idx_gi[0]]
    g2 = Gn[idx_gi[1]]
    # remove the best 2 graphs.
    del Gn[idx_gi[0]]
    del Gn[idx_gi[1] - 1]
    #    del Gn[8]

    Gn_mix = [g.copy() for g in Gn]
    Gn_mix.append(g1.copy())
    Gn_mix.append(g2.copy())

    # compute
    km = compute_kernel(Gn_mix, gkernel, True)
    g_best = []
    dis_best = []
    # for each alpha
    for alpha in alpha_range:
        print('alpha =', alpha)
        dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2],
                                               [alpha, 1 - alpha],
                                               range(len(Gn),
                                                     len(Gn) + 2), km, k,
                                               r_max, gkernel)
        dis_best.append(dhat)
        g_best.append(ghat_list)

    for idx, item in enumerate(alpha_range):
        print('when alpha is', item, 'the shortest distance is', dis_best[idx])
        print('the corresponding pre-images are')
        for g in g_best[idx]:
            draw_Letter_graph(g)
            #            nx.draw_networkx(g)
            #            plt.show()
            print(g.nodes(data=True))
            print(g.edges(data=True))
Ejemplo n.º 4
0
def test_iam_letter_h():
    from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations
    from gk_iam import dis_gstar, compute_kernel
    ds = {
        'name': 'Letter-high',
        'dataset': '../datasets/Letter-high/Letter-high_A.txt',
        'extra_params': {}
    }  # node nsymb
    #    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
    #          'extra_params': {}} # node nsymb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])

    lmbda = 0.03  # termination probalility
    #    alpha_range = np.linspace(0.5, 0.5, 1)

    # classify graphs according to letters.
    idx_dict = get_same_item_indices(y_all)
    time_list = []
    sod_list = []
    sod_min_list = []
    for letter in idx_dict:
        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]

        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)

        # compute
        g_best = []
        dis_best = []
        time0 = time.time()
        # for each alpha
        for alpha in alpha_range:
            print('alpha =', alpha)
            ghat_list, dhat = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
                Gn_let, Gn_let, c_ei=1.7, c_er=1.7, c_es=1.7)
            dis_best.append(dhat)
            g_best.append(ghat_list)
        time_list.append(time.time() - time0)

        # show best graphs and save them to file.
        for idx, item in enumerate(alpha_range):
            print('when alpha is', item, 'the shortest distance is',
                  dis_best[idx])
            print('the corresponding pre-images are')
            for g in g_best[idx]:
                draw_Letter_graph(g, savepath='results/iam/')
                #            nx.draw_networkx(g)
                #            plt.show()
                print(g.nodes(data=True))
                print(g.edges(data=True))

        # compute the corresponding sod in kernel space. (alpha range not considered.)
        gkernel = 'structuralspkernel'
        sod_tmp = []
        Gn_mix = g_best[0] + Gn_let
        km = compute_kernel(Gn_mix, gkernel, True)
        for ig, g in tqdm(enumerate(g_best[0]),
                          desc='computing kernel sod',
                          file=sys.stdout):
            dtemp = dis_gstar(ig,
                              range(len(g_best[0]), len(Gn_mix)),
                              [alpha_range[0]] * len(Gn_let),
                              km,
                              withterm3=False)
            sod_tmp.append(dtemp)
        sod_list.append(sod_tmp)
        sod_min_list.append(np.min(sod_tmp))

    print('\nsods in kernel space: ', sod_list)
    print('\nsmallest sod in kernel space for each letter: ', sod_min_list)
    print('\ntimes:', time_list)
Ejemplo n.º 5
0
def test_gkiam_letter_h():
    from gk_iam import gk_iam_nearest_multi, compute_kernel
    from iam import median_distance
    ds = {
        'name': 'Letter-high',
        'dataset': '../datasets/Letter-high/Letter-high_A.txt',
        'extra_params': {}
    }  # node nsymb
    #    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
    #          'extra_params': {}} # node nsymb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
    gkernel = 'structuralspkernel'

    lmbda = 0.03  # termination probalility
    r_max = 3  # recursions
    #    alpha_range = np.linspace(0.5, 0.5, 1)
    k = 10  # k nearest neighbors

    # classify graphs according to letters.
    idx_dict = get_same_item_indices(y_all)
    time_list = []
    sod_list = []
    sod_min_list = []
    for letter in idx_dict:
        print('\n-------------------------------------------------------\n')
        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
        Gn_mix = Gn_let + [g.copy() for g in Gn_let]

        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)

        # compute
        time0 = time.time()
        km = compute_kernel(Gn_mix, gkernel, True)
        g_best = []
        dis_best = []
        # for each alpha
        for alpha in alpha_range:
            print('alpha =', alpha)
            dhat, ghat_list = gk_iam_nearest_multi(
                Gn_let,
                Gn_let, [alpha] * len(Gn_let),
                range(len(Gn_let), len(Gn_mix)),
                km,
                k,
                r_max,
                gkernel,
                c_ei=1.7,
                c_er=1.7,
                c_es=1.7)
            dis_best.append(dhat)
            g_best.append(ghat_list)
        time_list.append(time.time() - time0)

        # show best graphs and save them to file.
        for idx, item in enumerate(alpha_range):
            print('when alpha is', item, 'the shortest distance is',
                  dis_best[idx])
            print('the corresponding pre-images are')
            for g in g_best[idx]:
                draw_Letter_graph(g, savepath='results/gk_iam/')
                #            nx.draw_networkx(g)
                #            plt.show()
                print(g.nodes(data=True))
                print(g.edges(data=True))

        # compute the corresponding sod in graph space. (alpha range not considered.)
        sod_tmp, _ = median_distance(g_best[0], Gn_let)
        sod_list.append(sod_tmp)
        sod_min_list.append(np.min(sod_tmp))

    print('\nsods in graph space: ', sod_list)
    print('\nsmallest sod in graph space for each letter: ', sod_min_list)
    print('\ntimes:', time_list)