def main(): args = parse() ill = loadfile(args.ill, 2) bert_dict = load_json_embd(args.desc) with open(args.graph, "rb") as f: graph_embd = pickle.load(f) e_num, _ = graph_embd.shape bert_embd = np.array([ bert_dict[i] if i in bert_dict else np.zeros_like(bert_dict[0]) for i in range(e_num) ]) embd = np.concatenate([0.8 * graph_embd, 0.2 * bert_embd], axis=1) get_hits(embd, ill)
def training(output_layer, loss, learning_rate, epochs, ILL, e, k, test): train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss) print('initializing...') init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) print('running...') J = [] t = len(ILL) ILL = np.array(ILL) L = np.ones((t, k)) * (ILL[:, 0].reshape((t, 1))) neg_left = L.reshape((t * k, )) L = np.ones((t, k)) * (ILL[:, 1].reshape((t, 1))) neg2_right = L.reshape((t * k, )) for i in range(epochs): if i % 10 == 0: out = sess.run(output_layer) neg2_left = get_neg(ILL[:, 1], out, k) neg_right = get_neg(ILL[:, 0], out, k) feeddict = { "neg_left:0": neg_left, "neg_right:0": neg_right, "neg2_left:0": neg2_left, "neg2_right:0": neg2_right } _, th = sess.run([train_step, loss], feed_dict=feeddict) if i % 10 == 0: th, outvec = sess.run([loss, output_layer], feed_dict=feeddict) J.append(th) get_hits(outvec, test) print('%d/%d' % (i + 1, epochs), 'epochs...', th) outvec = sess.run(output_layer) sess.close() return outvec, J
def training(output_h, loss_pre, loss_all, learning_rate, epochs, pre_epochs, ILL, e, k, save_suffix, dimension, train_batchnum, test, M0, e1, e2, KG, rel_type, output_r, l1, r_num, ILL_r): from include.Test import get_hits, get_rel_hits train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_pre) train_all = tf.train.AdamOptimizer(learning_rate).minimize(loss_all) print('initializing...') saver = tf.train.Saver() init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) print('running...') J = [] ILL = np.array(ILL) t = len(ILL) ILL_reshape = np.reshape(ILL, 2 * t, order='F') L = np.ones((t, k)) * (ILL[:, 0].reshape((t, 1))) neg_left = L.reshape((t * k, )) L = np.ones((t, k)) * (ILL[:, 1].reshape((t, 1))) neg2_right = L.reshape((t * k, )) kg_tri = [] for tri in KG: kg_tri.append([tri[0], tri[1], tri[2]]) tri_num = len(kg_tri) kg_tri = np.array(kg_tri) dn = 'RNM' if not os.path.exists(dn + '_' + "model/"): os.makedirs(dn + '_' + "model/") if os.path.exists(dn + '_' + "model/save_" + save_suffix + ".ckpt.meta"): saver.restore(sess, dn + '_' + "model/save_" + save_suffix + ".ckpt") start_epoch = pre_epochs else: start_epoch = 0 for i in range(start_epoch, epochs): if i % pre_epochs == 0: out = sess.run(output_h) print('data preparation') neg2_left = get_neg(ILL[:, 1], out, k, train_batchnum) neg_right = get_neg(ILL[:, 0], out, k, train_batchnum) for j in range(train_batchnum): beg = int(t / train_batchnum * j) if j == train_batchnum - 1: end = t else: end = int(t / train_batchnum * (j + 1)) feeddict = {} feeddict["ILL:0"] = ILL[beg:end] feeddict["neg_left:0"] = neg_left.reshape((t, k))[beg:end].reshape( (-1, )) feeddict["neg_right:0"] = neg_right.reshape( (t, k))[beg:end].reshape((-1, )) feeddict["neg2_left:0"] = neg2_left.reshape( (t, k))[beg:end].reshape((-1, )) feeddict["neg2_right:0"] = neg2_right.reshape( (t, k))[beg:end].reshape((-1, )) if i < pre_epochs: _ = sess.run([train_step], feed_dict=feeddict) else: beg = int(tri_num / train_batchnum * j) if j == train_batchnum - 1: end = tri_num else: end = int(tri_num / train_batchnum * (j + 1)) feeddict["head:0"] = kg_tri[beg:end, 0] feeddict["rel:0"] = kg_tri[beg:end, 1] feeddict["tail:0"] = kg_tri[beg:end, 2] _ = sess.run([train_all], feed_dict=feeddict) if (i + 1) % 10 == 0 or i == 0: print('%d/%d' % (i + 1, epochs), 'epochs...') if i == pre_epochs - 1: save_path = saver.save( sess, dn + '_' + "model/save_" + save_suffix + ".ckpt") print("Save to path: ", save_path) if i == epochs - 1: print('Testing') iters = 3 outvec, outvec_r = sess.run([output_h, output_r]) print('iter: 1') sim_e, sim_r = get_hits(outvec, outvec_r, l1, KG, ILL, rel_type, test, None, None) for t in range(iters): print('iter: ' + str(t + 2)) sim_e, sim_r = get_hits(outvec, outvec_r, l1, KG, ILL, rel_type, test, sim_e, sim_r) get_rel_hits(outvec, outvec_r, l1, KG, ILL, rel_type, test, ILL_r) sess.close() return
warnings.filterwarnings("ignore") ''' Follow the code style of GCN-Align: https://github.com/1049451037/GCN-Align ''' seed = 12306 np.random.seed(seed) tf.set_random_seed(seed) if __name__ == '__main__': e = len(set(loadfile(Config.e1, 1)) | set(loadfile(Config.e2, 1))) ILL = loadfile(Config.ill, 2) illL = len(ILL) np.random.shuffle(ILL) train = np.array(ILL[:illL // 10 * Config.seed]) test = ILL[illL // 10 * Config.seed:] KG1 = loadfile(Config.kg1, 3) KG2 = loadfile(Config.kg2, 3) output_layer, loss = build(Config.dim, Config.act_func, Config.alpha, Config.beta, Config.gamma, Config.k, Config.language[0:2], e, train, KG1 + KG2) vec, J = training(output_layer, loss, 0.001, Config.epochs, train, e, Config.k, test) print('loss:', J) print('Result:') get_hits(vec, test)
def training(output_h, output_h_match, loss_all, sample_w, loss_w, learning_rate, epochs, pre_epochs, ILL, e, k, sampled_nbr_num, save_suffix, dimension, dimension_g, c, train_batchnum, test_batchnum, test, M0, e1, e2, nbr_all, mask_all): from include.Test import get_hits, get_hits_new train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_all) train_step_w = tf.train.AdamOptimizer( learning_rate).minimize(loss_w, var_list=[sample_w]) print('initializing...') saver = tf.train.Saver() init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) print('running...') J = [] ILL = np.array(ILL) t = len(ILL) ILL_reshape = np.reshape(ILL, 2 * t, order='F') L = np.ones((t, k)) * (ILL[:, 0].reshape((t, 1))) neg_left = L.reshape((t * k,)) L = np.ones((t, k)) * (ILL[:, 1].reshape((t, 1))) neg2_right = L.reshape((t * k,)) nbr_sampled, mask_sampled = get_nbr(M0, e, sampled_nbr_num) mask_e1, mask_e2 = mask_candidate(e, e1, e2) test_reshape = np.reshape(np.array(test), -1, order='F') sample_w_vec = np.identity(dimension) test_can_num=50 if not os.path.exists("model/"): os.makedirs("model/") if os.path.exists("model/save_"+save_suffix+".ckpt.meta"): saver.restore(sess, "model/save_"+save_suffix+".ckpt") start_epoch=pre_epochs else: start_epoch=0 for i in range(start_epoch, epochs): if i % 50 == 0: out = sess.run(output_h) print('get negative pairs') neg2_left = get_neg(ILL[:, 1], out, k, train_batchnum) neg_right = get_neg(ILL[:, 0], out, k, train_batchnum) print('sample candidates') c_left = sample_candidate(ILL[:, 1], ILL[:, 0], out, c, mask_e2, train_batchnum) c_right = sample_candidate(ILL[:, 0], ILL[:, 1], out, c, mask_e1, train_batchnum) candidate = np.reshape(np.concatenate( (c_right, c_left), axis=0), (2, len(ILL), c)) print('sample neighborhood') nbr_sampled, mask_sampled = sample_nbr( out, nbr_all, mask_all, e, sampled_nbr_num, sample_w_vec, test_batchnum) feeddict = {"ILL:0": ILL, "candidate:0": candidate.reshape((-1,)), "neg_left:0": neg_left, "neg_right:0": neg_right, "neg2_left:0": neg2_left, "neg2_right:0": neg2_right, "nbr_sampled:0": nbr_sampled, "mask_sampled:0": mask_sampled, "c:0": c} if i < pre_epochs: feeddict["alpha:0"] = 0 else: feeddict["alpha:0"] = 1 for j in range(train_batchnum): beg = int(t / train_batchnum * j) if j==train_batchnum-1: end=t else: end = int(t / train_batchnum * (j + 1)) feeddict["ILL:0"] = ILL[beg:end] feeddict["candidate:0"] = candidate[:, beg:end].reshape((-1,)) feeddict["neg_left:0"] = neg_left.reshape( (t, k))[beg:end].reshape((-1,)) feeddict["neg_right:0"] = neg_right.reshape( (t, k))[beg:end].reshape((-1,)) feeddict["neg2_left:0"] = neg2_left.reshape( (t, k))[beg:end].reshape((-1,)) feeddict["neg2_right:0"] = neg2_right.reshape( (t, k))[beg:end].reshape((-1,)) _ = sess.run([train_step], feed_dict=feeddict) if i == pre_epochs - 1: save_path = saver.save(sess, "model/save_"+save_suffix+".ckpt") print("Save to path: ", save_path) if i % 10 == 0: print('%d/%d' % (i + 1, epochs), 'epochs...') outvec = sess.run(output_h, feed_dict=feeddict) test_can = get_hits(outvec, test, test_can_num) if i >= pre_epochs: for j in range(test_batchnum): beg = int(len(test) / test_batchnum * j) if j==test_batchnum-1: end=len(test) else: end = int(len(test) / test_batchnum * (j + 1)) feeddict_test = {"ILL:0": test[beg:end], "candidate:0": test_can[:, beg:end].reshape((-1,)), "nbr_sampled:0": nbr_sampled, "mask_sampled:0": mask_sampled, "c:0": test_can_num} outvec_h_match = sess.run( output_h_match, feed_dict=feeddict_test) if j == 0: outvec_h_match_all = outvec_h_match.reshape((2, -1, dimension+dimension_g)) else: outvec_h_match_all = np.concatenate( [outvec_h_match_all, outvec_h_match.reshape((2, -1, dimension+dimension_g))], axis=1) get_hits_new(outvec_h_match_all, test_can, test, test_can_num) if i >= pre_epochs and i % 50 == 49: print('train sample w') for _ in range(10): select_train = np.random.choice(len(ILL), 10) feeddict["select_train:0"] = select_train for j in range(5): _, thw = sess.run([train_step_w, loss_w], feed_dict=feeddict) print(thw) sample_w_vec = sess.run(sample_w, feed_dict=feeddict) sess.close() return outvec, J
print(e) ILL = loadfile(Config.ill, 2) illL = len(ILL) np.random.shuffle(ILL) train = np.array(ILL[:illL // 10 * Config.seed]) test = ILL[illL // 10 * Config.seed:] KG1 = loadfile(Config.kg1, 3) KG2 = loadfile(Config.kg2, 3) # build SE output_layer, loss = build_SE(Config.se_dim, Config.act_func, Config.gamma, Config.k, e, train, KG1 + KG2) se_vec, J = training(output_layer, loss, 25, Config.epochs_se, train, e, Config.k) print('loss:', J) print('Result of SE:') get_hits(se_vec, test) ent2id = get_ent2id([Config.e1, Config.e2]) attr = loadattr([Config.a1, Config.a2], e, ent2id) output_layer, loss = build_AE(attr, Config.ae_dim, Config.act_func, Config.gamma, Config.k, e, train, KG1 + KG2) ae_vec, J = training(output_layer, loss, 5, Config.epochs_ae, train, e, Config.k) print('loss:', J) # print('Result of AE:') # get_hits(ae_vec, test) # np.save('se_vec.npy', se_vec) # save embeddings # np.save('ae_vec.npy', ae_vec) # print(se_vec.shape, ae_vec.shape) # print('embeddings are saved.') print('Result of SE+AE:') get_combine_hits(se_vec, ae_vec, Config.beta, test)
print(e) ILL = loadfile(Config.ill, 2) illL = len(ILL) train = loadfile(Config.tr, 2) dev = loadfile(Config.dev, 2) np.random.shuffle(train) np.random.shuffle(dev) train = np.array(train + dev) test = loadfile(Config.te, 2) KG1 = loadfile(Config.kg1, 3) KG2 = loadfile(Config.kg2, 3) ent2id = get_ent2id([Config.e1, Config.e2]) # attr attr = load_attr([Config.a1, Config.a2], e, ent2id, Config.attr_num) # attr rel = load_relation(e, KG1 + KG2, Config.rel_num) if HYBRID: print("running HMAN...") output_layer, loss = build_HMAN(Config.se_dim, Config.act_func, Config.gamma, Config.k, \ e, train, KG1 + KG2, attr, Config.ae_dim, rel, Config.rel_dim) else: print("running MAN...") output_layer, loss = build_MAN(Config.se_dim, Config.act_func, Config.gamma, Config.k, \ e, train, KG1 + KG2, attr, Config.ae_dim, rel, Config.rel_dim) graph_embd, J = training(output_layer, loss, 25, Config.epochs, train, e, Config.k, test) get_hits(graph_embd, test) with open(Config.ckpt + "/%s_graph_embd.pkl" % Config.language, "wb") as f: pickle.dump(graph_embd, f)