Example #1
0
def main():
    args = parse()
    ill = loadfile(args.ill, 2)
    bert_dict = load_json_embd(args.desc)
    with open(args.graph, "rb") as f:
        graph_embd = pickle.load(f)
    e_num, _ = graph_embd.shape
    bert_embd = np.array([
        bert_dict[i] if i in bert_dict else np.zeros_like(bert_dict[0])
        for i in range(e_num)
    ])
    embd = np.concatenate([0.8 * graph_embd, 0.2 * bert_embd], axis=1)
    get_hits(embd, ill)
Example #2
0
def training(output_layer, loss, learning_rate, epochs, ILL, e, k, test):
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    print('initializing...')
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    print('running...')
    J = []
    t = len(ILL)
    ILL = np.array(ILL)
    L = np.ones((t, k)) * (ILL[:, 0].reshape((t, 1)))
    neg_left = L.reshape((t * k, ))
    L = np.ones((t, k)) * (ILL[:, 1].reshape((t, 1)))
    neg2_right = L.reshape((t * k, ))
    for i in range(epochs):
        if i % 10 == 0:
            out = sess.run(output_layer)
            neg2_left = get_neg(ILL[:, 1], out, k)
            neg_right = get_neg(ILL[:, 0], out, k)
            feeddict = {
                "neg_left:0": neg_left,
                "neg_right:0": neg_right,
                "neg2_left:0": neg2_left,
                "neg2_right:0": neg2_right
            }

        _, th = sess.run([train_step, loss], feed_dict=feeddict)
        if i % 10 == 0:
            th, outvec = sess.run([loss, output_layer], feed_dict=feeddict)
            J.append(th)
            get_hits(outvec, test)

        print('%d/%d' % (i + 1, epochs), 'epochs...', th)
    outvec = sess.run(output_layer)
    sess.close()
    return outvec, J
Example #3
0
def training(output_h, loss_pre, loss_all, learning_rate, epochs, pre_epochs,
             ILL, e, k, save_suffix, dimension, train_batchnum, test, M0, e1,
             e2, KG, rel_type, output_r, l1, r_num, ILL_r):
    from include.Test import get_hits, get_rel_hits
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_pre)
    train_all = tf.train.AdamOptimizer(learning_rate).minimize(loss_all)

    print('initializing...')
    saver = tf.train.Saver()
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    print('running...')
    J = []
    ILL = np.array(ILL)
    t = len(ILL)
    ILL_reshape = np.reshape(ILL, 2 * t, order='F')
    L = np.ones((t, k)) * (ILL[:, 0].reshape((t, 1)))
    neg_left = L.reshape((t * k, ))
    L = np.ones((t, k)) * (ILL[:, 1].reshape((t, 1)))
    neg2_right = L.reshape((t * k, ))

    kg_tri = []
    for tri in KG:
        kg_tri.append([tri[0], tri[1], tri[2]])
    tri_num = len(kg_tri)
    kg_tri = np.array(kg_tri)

    dn = 'RNM'

    if not os.path.exists(dn + '_' + "model/"):
        os.makedirs(dn + '_' + "model/")

    if os.path.exists(dn + '_' + "model/save_" + save_suffix + ".ckpt.meta"):
        saver.restore(sess, dn + '_' + "model/save_" + save_suffix + ".ckpt")
        start_epoch = pre_epochs
    else:
        start_epoch = 0

    for i in range(start_epoch, epochs):

        if i % pre_epochs == 0:
            out = sess.run(output_h)
            print('data preparation')
            neg2_left = get_neg(ILL[:, 1], out, k, train_batchnum)
            neg_right = get_neg(ILL[:, 0], out, k, train_batchnum)

        for j in range(train_batchnum):
            beg = int(t / train_batchnum * j)
            if j == train_batchnum - 1:
                end = t
            else:
                end = int(t / train_batchnum * (j + 1))

            feeddict = {}
            feeddict["ILL:0"] = ILL[beg:end]
            feeddict["neg_left:0"] = neg_left.reshape((t, k))[beg:end].reshape(
                (-1, ))
            feeddict["neg_right:0"] = neg_right.reshape(
                (t, k))[beg:end].reshape((-1, ))
            feeddict["neg2_left:0"] = neg2_left.reshape(
                (t, k))[beg:end].reshape((-1, ))
            feeddict["neg2_right:0"] = neg2_right.reshape(
                (t, k))[beg:end].reshape((-1, ))

            if i < pre_epochs:
                _ = sess.run([train_step], feed_dict=feeddict)
            else:
                beg = int(tri_num / train_batchnum * j)
                if j == train_batchnum - 1:
                    end = tri_num
                else:
                    end = int(tri_num / train_batchnum * (j + 1))

                feeddict["head:0"] = kg_tri[beg:end, 0]
                feeddict["rel:0"] = kg_tri[beg:end, 1]
                feeddict["tail:0"] = kg_tri[beg:end, 2]

                _ = sess.run([train_all], feed_dict=feeddict)

        if (i + 1) % 10 == 0 or i == 0:
            print('%d/%d' % (i + 1, epochs), 'epochs...')

        if i == pre_epochs - 1:
            save_path = saver.save(
                sess, dn + '_' + "model/save_" + save_suffix + ".ckpt")
            print("Save to path: ", save_path)

        if i == epochs - 1:
            print('Testing')
            iters = 3
            outvec, outvec_r = sess.run([output_h, output_r])
            print('iter: 1')

            sim_e, sim_r = get_hits(outvec, outvec_r, l1, KG, ILL, rel_type,
                                    test, None, None)
            for t in range(iters):
                print('iter: ' + str(t + 2))
                sim_e, sim_r = get_hits(outvec, outvec_r, l1, KG, ILL,
                                        rel_type, test, sim_e, sim_r)

            get_rel_hits(outvec, outvec_r, l1, KG, ILL, rel_type, test, ILL_r)

    sess.close()

    return
Example #4
0
warnings.filterwarnings("ignore")
'''
Follow the code style of GCN-Align:
https://github.com/1049451037/GCN-Align
'''

seed = 12306
np.random.seed(seed)
tf.set_random_seed(seed)

if __name__ == '__main__':
    e = len(set(loadfile(Config.e1, 1)) | set(loadfile(Config.e2, 1)))

    ILL = loadfile(Config.ill, 2)
    illL = len(ILL)
    np.random.shuffle(ILL)
    train = np.array(ILL[:illL // 10 * Config.seed])
    test = ILL[illL // 10 * Config.seed:]

    KG1 = loadfile(Config.kg1, 3)
    KG2 = loadfile(Config.kg2, 3)

    output_layer, loss = build(Config.dim, Config.act_func, Config.alpha,
                               Config.beta, Config.gamma, Config.k,
                               Config.language[0:2], e, train, KG1 + KG2)
    vec, J = training(output_layer, loss, 0.001, Config.epochs, train, e,
                      Config.k, test)
    print('loss:', J)
    print('Result:')
    get_hits(vec, test)
Example #5
0
def training(output_h, output_h_match, loss_all, sample_w, loss_w, learning_rate, 
             epochs, pre_epochs, ILL, e, k, sampled_nbr_num, save_suffix, dimension, dimension_g, c, 
             train_batchnum, test_batchnum,
             test, M0, e1, e2, nbr_all, mask_all):
    from include.Test import get_hits, get_hits_new
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_all)
    train_step_w = tf.train.AdamOptimizer(
        learning_rate).minimize(loss_w, var_list=[sample_w])
    print('initializing...')
    saver = tf.train.Saver()
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    print('running...')
    J = []
    ILL = np.array(ILL)
    t = len(ILL)
    ILL_reshape = np.reshape(ILL, 2 * t, order='F')
    L = np.ones((t, k)) * (ILL[:, 0].reshape((t, 1)))
    neg_left = L.reshape((t * k,))
    L = np.ones((t, k)) * (ILL[:, 1].reshape((t, 1)))
    neg2_right = L.reshape((t * k,))

    nbr_sampled, mask_sampled = get_nbr(M0, e, sampled_nbr_num)
    mask_e1, mask_e2 = mask_candidate(e, e1, e2)
    test_reshape = np.reshape(np.array(test), -1, order='F')
    sample_w_vec = np.identity(dimension)
    test_can_num=50

    if not os.path.exists("model/"):
        os.makedirs("model/")

    if os.path.exists("model/save_"+save_suffix+".ckpt.meta"):
        saver.restore(sess, "model/save_"+save_suffix+".ckpt")
        start_epoch=pre_epochs
    else:
        start_epoch=0

    for i in range(start_epoch, epochs):
        if i % 50 == 0:
            out = sess.run(output_h)
            print('get negative pairs')
            neg2_left = get_neg(ILL[:, 1], out, k, train_batchnum)
            neg_right = get_neg(ILL[:, 0], out, k, train_batchnum)
            print('sample candidates')
            c_left = sample_candidate(ILL[:, 1], ILL[:, 0], out, c, mask_e2, train_batchnum)
            c_right = sample_candidate(ILL[:, 0], ILL[:, 1], out, c, mask_e1, train_batchnum)
            candidate = np.reshape(np.concatenate(
                (c_right, c_left), axis=0), (2, len(ILL), c)) 
            print('sample neighborhood')
            nbr_sampled, mask_sampled = sample_nbr(
                out, nbr_all, mask_all, e, sampled_nbr_num, sample_w_vec, test_batchnum)
            feeddict = {"ILL:0": ILL,
                        "candidate:0": candidate.reshape((-1,)),
                        "neg_left:0": neg_left,
                        "neg_right:0": neg_right,
                        "neg2_left:0": neg2_left,
                        "neg2_right:0": neg2_right,
                        "nbr_sampled:0": nbr_sampled,
                        "mask_sampled:0": mask_sampled,
                        "c:0": c}

            if i < pre_epochs:
                feeddict["alpha:0"] = 0
            else:
                feeddict["alpha:0"] = 1

        for j in range(train_batchnum):
            beg = int(t / train_batchnum * j)
            if j==train_batchnum-1:
                end=t
            else:
                end = int(t / train_batchnum * (j + 1))
            feeddict["ILL:0"] = ILL[beg:end]
            feeddict["candidate:0"] = candidate[:, beg:end].reshape((-1,))
            feeddict["neg_left:0"] = neg_left.reshape(
                (t, k))[beg:end].reshape((-1,))
            feeddict["neg_right:0"] = neg_right.reshape(
                (t, k))[beg:end].reshape((-1,))
            feeddict["neg2_left:0"] = neg2_left.reshape(
                (t, k))[beg:end].reshape((-1,))
            feeddict["neg2_right:0"] = neg2_right.reshape(
                (t, k))[beg:end].reshape((-1,))
            _ = sess.run([train_step], feed_dict=feeddict)

        if i == pre_epochs - 1:
            save_path = saver.save(sess, "model/save_"+save_suffix+".ckpt")
            print("Save to path: ", save_path)

        if i % 10 == 0:
            print('%d/%d' % (i + 1, epochs), 'epochs...')
            outvec = sess.run(output_h, feed_dict=feeddict)
            test_can = get_hits(outvec, test, test_can_num)
            if i >= pre_epochs:
                for j in range(test_batchnum):
                    beg = int(len(test) / test_batchnum * j)
                    if j==test_batchnum-1:
                        end=len(test)
                    else:
                        end = int(len(test) / test_batchnum * (j + 1))
                    feeddict_test = {"ILL:0": test[beg:end],
                                     "candidate:0": test_can[:, beg:end].reshape((-1,)),
                                     "nbr_sampled:0": nbr_sampled,
                                     "mask_sampled:0": mask_sampled,
                                     "c:0": test_can_num}
                    outvec_h_match = sess.run(
                        output_h_match, feed_dict=feeddict_test)
                    if j == 0:
                        outvec_h_match_all = outvec_h_match.reshape((2, -1, dimension+dimension_g))
                    else:
                        outvec_h_match_all = np.concatenate(
                            [outvec_h_match_all, outvec_h_match.reshape((2, -1, dimension+dimension_g))], axis=1)
                get_hits_new(outvec_h_match_all, test_can, test, test_can_num)

        if i >= pre_epochs and i % 50 == 49:
            print('train sample w')
            for _ in range(10):
                select_train = np.random.choice(len(ILL), 10)
                feeddict["select_train:0"] = select_train
                for j in range(5):
                    _, thw = sess.run([train_step_w, loss_w],
                                      feed_dict=feeddict)
                print(thw)
            sample_w_vec = sess.run(sample_w, feed_dict=feeddict)

    sess.close()
    return outvec, J
Example #6
0
 print(e)
 ILL = loadfile(Config.ill, 2)
 illL = len(ILL)
 np.random.shuffle(ILL)
 train = np.array(ILL[:illL // 10 * Config.seed])
 test = ILL[illL // 10 * Config.seed:]
 KG1 = loadfile(Config.kg1, 3)
 KG2 = loadfile(Config.kg2, 3)
 # build SE
 output_layer, loss = build_SE(Config.se_dim, Config.act_func, Config.gamma,
                               Config.k, e, train, KG1 + KG2)
 se_vec, J = training(output_layer, loss, 25, Config.epochs_se, train, e,
                      Config.k)
 print('loss:', J)
 print('Result of SE:')
 get_hits(se_vec, test)
 ent2id = get_ent2id([Config.e1, Config.e2])
 attr = loadattr([Config.a1, Config.a2], e, ent2id)
 output_layer, loss = build_AE(attr, Config.ae_dim, Config.act_func,
                               Config.gamma, Config.k, e, train, KG1 + KG2)
 ae_vec, J = training(output_layer, loss, 5, Config.epochs_ae, train, e,
                      Config.k)
 print('loss:', J)
 # print('Result of AE:')
 # get_hits(ae_vec, test)
 # np.save('se_vec.npy', se_vec) # save embeddings
 # np.save('ae_vec.npy', ae_vec)
 # print(se_vec.shape, ae_vec.shape)
 # print('embeddings are saved.')
 print('Result of SE+AE:')
 get_combine_hits(se_vec, ae_vec, Config.beta, test)
Example #7
0
    print(e)
    ILL = loadfile(Config.ill, 2)
    illL = len(ILL)
    train = loadfile(Config.tr, 2)
    dev = loadfile(Config.dev, 2)
    np.random.shuffle(train)
    np.random.shuffle(dev)
    train = np.array(train + dev)
    test = loadfile(Config.te, 2)
    KG1 = loadfile(Config.kg1, 3)
    KG2 = loadfile(Config.kg2, 3)
    ent2id = get_ent2id([Config.e1, Config.e2])  # attr
    attr = load_attr([Config.a1, Config.a2], e, ent2id,
                     Config.attr_num)  # attr
    rel = load_relation(e, KG1 + KG2, Config.rel_num)

    if HYBRID:
        print("running HMAN...")
        output_layer, loss = build_HMAN(Config.se_dim, Config.act_func, Config.gamma, Config.k, \
                                        e, train, KG1 + KG2, attr, Config.ae_dim, rel, Config.rel_dim)
    else:
        print("running MAN...")
        output_layer, loss = build_MAN(Config.se_dim, Config.act_func, Config.gamma, Config.k, \
                                        e, train, KG1 + KG2, attr, Config.ae_dim, rel, Config.rel_dim)

    graph_embd, J = training(output_layer, loss, 25, Config.epochs, train, e,
                             Config.k, test)
    get_hits(graph_embd, test)
    with open(Config.ckpt + "/%s_graph_embd.pkl" % Config.language, "wb") as f:
        pickle.dump(graph_embd, f)