def eval_alignment_mul(sim_mat, top_k, mess=""): t = time.time() ref_num = sim_mat.shape[0] t_num = [0 for k in top_k] t_mean = 0 t_mrr = 0 tasks = div_list(np.array(range(ref_num)), P.nums_threads) pool = multiprocessing.Pool(processes=len(tasks)) reses = list() for task in tasks: reses.append( pool.apply_async(cal_rank, (task, sim_mat[task, :], top_k))) pool.close() pool.join() for res in reses: mean, mrr, num = res.get() t_mean += mean t_mrr += mrr t_num += np.array(num) acc = np.array(t_num) / ref_num for i in range(len(acc)): acc[i] = round(acc[i], 4) t_mean /= ref_num t_mrr /= ref_num print( "{}, hits@{} = {}, mr = {:.3f}, mrr = {:.3f}, time = {:.3f} s ".format( mess, top_k, acc, t_mean, t_mrr, time.time() - t))
def generate_neighbours_multi_embed(embed, ent_list, k): ent_frags = ut.div_list(np.array(ent_list), P.nums_threads) ent_frag_indexes = ut.div_list(np.array(range(len(ent_list))), P.nums_threads) pool = multiprocessing.Pool(processes=len(ent_frags)) results = list() for i in range(len(ent_frags)): results.append(pool.apply_async(cal_neighbours_embed, (ent_frags[i], np.array(ent_list), embed[ent_frag_indexes[i], :], embed, k))) pool.close() pool.join() dic = dict() for res in results: dic = ut.merge_dic(dic, res.get()) del embed gc.collect() return dic
def train_tris_1epo(model, triples1, triples2, nbours1, nbours2, nums_neg, batch_size, nums_threads_batch): loss = 0 start = time.time() triples_num = triples1.triples_num + triples2.triples_num triple_steps = int(math.ceil(triples_num / batch_size)) stepss = ut.div_list(list(range(triple_steps)), nums_threads_batch) assert len(stepss) == nums_threads_batch batch_queue = mp.Queue() for steps in stepss: mp.Process(target=generate_batch_via_neighbour_no_pair_queue, kwargs={'que': batch_queue, 'triples1': triples1, "triples2": triples2, "steps": steps, "batch_size": batch_size, "nbours1": nbours1, "nbours2": nbours2, "multi": nums_neg}).start() for step in range(triple_steps): fetches = {"loss": model.triple_loss, "train_op": model.triple_optimizer} batch_pos, batch_neg = batch_queue.get() triple_feed_dict = {model.pos_hs: [x[0] for x in batch_pos], model.pos_rs: [x[1] for x in batch_pos], model.pos_ts: [x[2] for x in batch_pos], model.neg_hs: [x[0] for x in batch_neg], model.neg_rs: [x[1] for x in batch_neg], model.neg_ts: [x[2] for x in batch_neg]} vals = model.session.run(fetches=fetches, feed_dict=triple_feed_dict) loss += vals["loss"] loss /= triple_steps random.shuffle(triples1.triple_list) random.shuffle(triples2.triple_list) end = time.time() return loss, round(end - start, 2)
def generate_neighbours_multi_embed(embed, ent_list, k, nums_threads): ent_frags = ut.div_list(np.array(ent_list), nums_threads) ent_frag_indexes = ut.div_list(np.array(range(len(ent_list))), nums_threads) pool = multiprocessing.Pool(processes=len(ent_frags)) results = list() for i in range(len(ent_frags)): results.append(pool.apply_async(cal_neighbours_embed, (ent_frags[i], np.array(ent_list), embed[ent_frag_indexes[i], :], embed, k))) pool.close() pool.join() dic = dict() for res in results: dic = ut.merge_dic(dic, res.get()) t1 = time.time() m1 = psutil.virtual_memory().used del embed gc.collect() # print("gc costs {:.3f} s, mem change {:.6f} G".format(time.time() - t1, (psutil.virtual_memory().used - m1) / g)) return dic
def eval_alignment_by_sim_mat(embed1, embed2, top_k, csls=0, accurate=False): t = time.time() sim_mat = sim_handler(embed1, embed2, csls) # ***************************************** print("*******************sim_mat*****************") # *********************************************** ref_num = sim_mat.shape[0] t_num = [0 for k in top_k] t_mean = 0 t_mrr = 0 t_prec_set = set() tasks = div_list(np.array(range(ref_num)), P.nums_threads) pool = multiprocessing.Pool(processes=len(tasks)) reses = list() for task in tasks: reses.append( pool.apply_async(cal_rank_by_sim_mat, (task, sim_mat[task, :], top_k, accurate))) pool.close() pool.join() for res in reses: mean, mrr, num, prec_set = res.get() t_mean += mean t_mrr += mrr t_num += np.array(num) t_prec_set |= prec_set assert len(t_prec_set) == ref_num acc = np.array(t_num) / ref_num * 100 for i in range(len(acc)): acc[i] = round(acc[i], 2) t_mean /= ref_num t_mrr /= ref_num if csls > 0: print("csls = {}".format(csls)) if accurate: print( "accurate results: hits@{} = {}, mr = {:.3f}, mrr = {:.3f}, time = {:.3f} s " .format(top_k, acc, t_mean, t_mrr, time.time() - t)) else: print("hits@{} = {}, time = {:.3f} s ".format(top_k, acc, time.time() - t)) hits1 = acc[0] del sim_mat gc.collect() # return t_prec_set, hits1 return top_k, acc, t_mean, t_mrr, time.time() - t
def CSLS_sim(sim_mat1, k, nums_threads): # sorted_mat = -np.partition(-sim_mat1, k, axis=1) # -np.sort(-sim_mat1) # nearest_k = sorted_mat[:, 0:k] # sim_values = np.mean(nearest_k, axis=1) tasks = div_list(np.array(range(sim_mat1.shape[0])), nums_threads) pool = multiprocessing.Pool(processes=len(tasks)) reses = list() for task in tasks: reses.append(pool.apply_async(cal_csls_sim, (sim_mat1[task, :], k))) pool.close() pool.join() sim_values = None for res in reses: val = res.get() if sim_values is None: sim_values = val else: sim_values = np.append(sim_values, val) assert sim_values.shape[0] == sim_mat1.shape[0] return sim_values
from utils import div_list import tensorflow as tf import numpy as np from train import Training if __name__ == "__main__": # Initial model gcn = Training() # Set random seed seed = 123 np.random.seed(seed) tf.compat.v1.set_random_seed(seed) labels = np.loadtxt("data/adj.txt") reorder = np.arange(labels.shape[0]) np.random.shuffle(reorder) cv_num = 5 order = div_list(reorder.tolist(), cv_num) for i in range(cv_num): print("cross_validation:", '%01d' % (i)) test_arr = order[i] arr = list(set(reorder).difference(set(test_arr))) np.random.shuffle(arr) train_arr = arr scores = gcn.train(train_arr, test_arr)
def eval_alignment_by_div_embed(embed1, embed2, top_k, nums_threads, selected_pairs=None, accurate=False, is_euclidean=False): def pair2dic(pairs): if pairs is None or len(pairs) == 0: return None dic = dict() for i, j in pairs: if i not in dic.keys(): dic[i] = j assert len(dic) == len(pairs) return dic t = time.time() dic = pair2dic(selected_pairs) ref_num = embed1.shape[0] t_num = np.array([0 for k in top_k]) t_mean = 0 t_mrr = 0 t_num1 = np.array([0 for k in top_k]) t_mean1 = 0 t_mrr1 = 0 t_prec_set = set() frags = div_list(np.array(range(ref_num)), nums_threads) pool = multiprocessing.Pool(processes=len(frags)) reses = list() for frag in frags: reses.append( pool.apply_async(cal_rank_by_div_embed, (frag, dic, embed1[frag, :], embed2, top_k, accurate, is_euclidean))) pool.close() pool.join() for res in reses: mean, mrr, num, mean1, mrr1, num1, prec_set = res.get() t_mean += mean t_mrr += mrr t_num += num t_mean1 += mean1 t_mrr1 += mrr1 t_num1 += num1 t_prec_set |= prec_set assert len(t_prec_set) == ref_num acc = t_num / ref_num * 100 for i in range(len(acc)): acc[i] = round(acc[i], 2) t_mean /= ref_num t_mrr /= ref_num if accurate: print( "accurate results: hits@{} = {}, mr = {:.3f}, mrr = {:.3f}, time = {:.3f} s " .format(top_k, acc, t_mean, t_mrr, time.time() - t)) else: print("hits@{} = {}, time = {:.3f} s ".format(top_k, acc, time.time() - t)) hits1 = acc[0] if selected_pairs is not None and len(selected_pairs) > 0: acc1 = t_num1 / ref_num * 100 for i in range(len(acc1)): acc1[i] = round(acc1[i], 2) t_mean1 /= ref_num t_mrr1 /= ref_num hits1 = acc1[0] if accurate: print( "accurate results: hits@{} = {}, mr = {:.3f}, mrr = {:.3f}, time = {:.3f} s " .format(top_k, acc, t_mean, t_mrr, time.time() - t)) else: print("hits@{} = {}, time = {:.3f} s ".format( top_k, acc, time.time() - t)) gc.collect() return t_prec_set, hits1
def eval_alignment_by_mcd_sim_mat(embed1, embed2, top_k, csls=0, accurate=False): t = time.time() sim_mat = sim_handler(embed1, embed2, csls) n, m = sim_mat.shape[0], sim_mat.shape[1] row_sum = np.sum(sim_mat, axis=1) col_sum = np.sum(sim_mat, axis=0) # print(type(row_sum), row_sum.shape) # print(type(col_sum), col_sum.shape) mcd = np.zeros((n, m)) for i, j in product(range(n), range(m)): mu = (row_sum[i, ] + col_sum[j, ] - sim_mat[i, j]) / (n + m - 1) delte = np.square(sim_mat[i, j] - mu) mcd[i, j] = delte print("********************mcd*********************") print(mcd) ref_num = mcd.shape[0] t_num = [0 for k in top_k] t_mean = 0 t_mrr = 0 t_prec_set = set() tasks = div_list(np.array(range(ref_num)), P.nums_threads) pool = multiprocessing.Pool(processes=len(tasks)) reses = list() for task in tasks: reses.append( pool.apply_async(cal_rank_by_sim_mat, (task, mcd[task, :], top_k, accurate))) pool.close() pool.join() for res in reses: mean, mrr, num, prec_set = res.get() t_mean += mean t_mrr += mrr t_num += np.array(num) t_prec_set |= prec_set assert len(t_prec_set) == ref_num acc = np.array(t_num) / ref_num * 100 for i in range(len(acc)): acc[i] = round(acc[i], 2) t_mean /= ref_num t_mrr /= ref_num if csls > 0: print("csls = {}".format(csls)) if accurate: print( "accurate results: hits@{} = {}, mr = {:.3f}, mrr = {:.3f}, time = {:.3f} s " .format(top_k, acc, t_mean, t_mrr, time.time() - t)) else: print("hits@{} = {}, time = {:.3f} s ".format(top_k, acc, time.time() - t)) hits1 = acc[0] del sim_mat del mcd gc.collect() return top_k, acc, t_mean, t_mrr, time.time() - t