Beispiel #1
0
def eval_alignment_mul(sim_mat, top_k, mess=""):
    t = time.time()
    ref_num = sim_mat.shape[0]
    t_num = [0 for k in top_k]
    t_mean = 0
    t_mrr = 0
    tasks = div_list(np.array(range(ref_num)), P.nums_threads)
    pool = multiprocessing.Pool(processes=len(tasks))
    reses = list()
    for task in tasks:
        reses.append(
            pool.apply_async(cal_rank, (task, sim_mat[task, :], top_k)))
    pool.close()
    pool.join()

    for res in reses:
        mean, mrr, num = res.get()
        t_mean += mean
        t_mrr += mrr
        t_num += np.array(num)

    acc = np.array(t_num) / ref_num
    for i in range(len(acc)):
        acc[i] = round(acc[i], 4)
    t_mean /= ref_num
    t_mrr /= ref_num
    print(
        "{}, hits@{} = {}, mr = {:.3f}, mrr = {:.3f}, time = {:.3f} s ".format(
            mess, top_k, acc, t_mean, t_mrr,
            time.time() - t))
Beispiel #2
0
def generate_neighbours_multi_embed(embed, ent_list, k):
    ent_frags = ut.div_list(np.array(ent_list), P.nums_threads)
    ent_frag_indexes = ut.div_list(np.array(range(len(ent_list))), P.nums_threads)
    pool = multiprocessing.Pool(processes=len(ent_frags))
    results = list()
    for i in range(len(ent_frags)):
        results.append(pool.apply_async(cal_neighbours_embed,
                                        (ent_frags[i], np.array(ent_list), embed[ent_frag_indexes[i], :], embed, k)))
    pool.close()
    pool.join()
    dic = dict()
    for res in results:
        dic = ut.merge_dic(dic, res.get())
    del embed
    gc.collect()
    return dic
Beispiel #3
0
def train_tris_1epo(model, triples1, triples2, nbours1, nbours2, nums_neg, batch_size, nums_threads_batch):
    loss = 0
    start = time.time()
    triples_num = triples1.triples_num + triples2.triples_num
    triple_steps = int(math.ceil(triples_num / batch_size))
    stepss = ut.div_list(list(range(triple_steps)), nums_threads_batch)
    assert len(stepss) == nums_threads_batch
    batch_queue = mp.Queue()
    for steps in stepss:
        mp.Process(target=generate_batch_via_neighbour_no_pair_queue, kwargs={'que': batch_queue,
                                                                              'triples1': triples1,
                                                                              "triples2": triples2,
                                                                              "steps": steps,
                                                                              "batch_size": batch_size,
                                                                              "nbours1": nbours1,
                                                                              "nbours2": nbours2,
                                                                              "multi": nums_neg}).start()
    for step in range(triple_steps):
        fetches = {"loss": model.triple_loss, "train_op": model.triple_optimizer}
        batch_pos, batch_neg = batch_queue.get()
        triple_feed_dict = {model.pos_hs: [x[0] for x in batch_pos],
                            model.pos_rs: [x[1] for x in batch_pos],
                            model.pos_ts: [x[2] for x in batch_pos],
                            model.neg_hs: [x[0] for x in batch_neg],
                            model.neg_rs: [x[1] for x in batch_neg],
                            model.neg_ts: [x[2] for x in batch_neg]}
        vals = model.session.run(fetches=fetches, feed_dict=triple_feed_dict)
        loss += vals["loss"]
    loss /= triple_steps
    random.shuffle(triples1.triple_list)
    random.shuffle(triples2.triple_list)
    end = time.time()
    return loss, round(end - start, 2)
Beispiel #4
0
def generate_neighbours_multi_embed(embed, ent_list, k, nums_threads):
    ent_frags = ut.div_list(np.array(ent_list), nums_threads)
    ent_frag_indexes = ut.div_list(np.array(range(len(ent_list))), nums_threads)
    pool = multiprocessing.Pool(processes=len(ent_frags))
    results = list()
    for i in range(len(ent_frags)):
        results.append(pool.apply_async(cal_neighbours_embed,
                                        (ent_frags[i], np.array(ent_list), embed[ent_frag_indexes[i], :], embed, k)))
    pool.close()
    pool.join()
    dic = dict()
    for res in results:
        dic = ut.merge_dic(dic, res.get())
    t1 = time.time()
    m1 = psutil.virtual_memory().used
    del embed
    gc.collect()
    # print("gc costs {:.3f} s, mem change {:.6f} G".format(time.time() - t1, (psutil.virtual_memory().used - m1) / g))
    return dic
def eval_alignment_by_sim_mat(embed1, embed2, top_k, csls=0, accurate=False):
    t = time.time()
    sim_mat = sim_handler(embed1, embed2, csls)
    # *****************************************
    print("*******************sim_mat*****************")
    # ***********************************************

    ref_num = sim_mat.shape[0]
    t_num = [0 for k in top_k]
    t_mean = 0
    t_mrr = 0
    t_prec_set = set()
    tasks = div_list(np.array(range(ref_num)), P.nums_threads)
    pool = multiprocessing.Pool(processes=len(tasks))
    reses = list()
    for task in tasks:
        reses.append(
            pool.apply_async(cal_rank_by_sim_mat,
                             (task, sim_mat[task, :], top_k, accurate)))
    pool.close()
    pool.join()

    for res in reses:
        mean, mrr, num, prec_set = res.get()
        t_mean += mean
        t_mrr += mrr
        t_num += np.array(num)
        t_prec_set |= prec_set
    assert len(t_prec_set) == ref_num
    acc = np.array(t_num) / ref_num * 100
    for i in range(len(acc)):
        acc[i] = round(acc[i], 2)
    t_mean /= ref_num
    t_mrr /= ref_num
    if csls > 0:
        print("csls = {}".format(csls))
    if accurate:
        print(
            "accurate results: hits@{} = {}, mr = {:.3f}, mrr = {:.3f}, time = {:.3f} s "
            .format(top_k, acc, t_mean, t_mrr,
                    time.time() - t))
    else:
        print("hits@{} = {}, time = {:.3f} s ".format(top_k, acc,
                                                      time.time() - t))
    hits1 = acc[0]
    del sim_mat
    gc.collect()
    # return t_prec_set, hits1
    return top_k, acc, t_mean, t_mrr, time.time() - t
Beispiel #6
0
def CSLS_sim(sim_mat1, k, nums_threads):
    # sorted_mat = -np.partition(-sim_mat1, k, axis=1) # -np.sort(-sim_mat1)
    # nearest_k = sorted_mat[:, 0:k]
    # sim_values = np.mean(nearest_k, axis=1)

    tasks = div_list(np.array(range(sim_mat1.shape[0])), nums_threads)
    pool = multiprocessing.Pool(processes=len(tasks))
    reses = list()
    for task in tasks:
        reses.append(pool.apply_async(cal_csls_sim, (sim_mat1[task, :], k)))
    pool.close()
    pool.join()
    sim_values = None
    for res in reses:
        val = res.get()
        if sim_values is None:
            sim_values = val
        else:
            sim_values = np.append(sim_values, val)
    assert sim_values.shape[0] == sim_mat1.shape[0]
    return sim_values
Beispiel #7
0
from utils import div_list
import tensorflow as tf
import numpy as np
from train import Training

if __name__ == "__main__":
    # Initial model
    gcn = Training()

    # Set random seed
    seed = 123
    np.random.seed(seed)
    tf.compat.v1.set_random_seed(seed)

    labels = np.loadtxt("data/adj.txt")
    reorder = np.arange(labels.shape[0])
    np.random.shuffle(reorder)

    cv_num = 5

    order = div_list(reorder.tolist(), cv_num)
    for i in range(cv_num):
        print("cross_validation:", '%01d' % (i))
        test_arr = order[i]
        arr = list(set(reorder).difference(set(test_arr)))
        np.random.shuffle(arr)
        train_arr = arr
        scores = gcn.train(train_arr, test_arr)
Beispiel #8
0
def eval_alignment_by_div_embed(embed1,
                                embed2,
                                top_k,
                                nums_threads,
                                selected_pairs=None,
                                accurate=False,
                                is_euclidean=False):
    def pair2dic(pairs):
        if pairs is None or len(pairs) == 0:
            return None
        dic = dict()
        for i, j in pairs:
            if i not in dic.keys():
                dic[i] = j
        assert len(dic) == len(pairs)
        return dic

    t = time.time()
    dic = pair2dic(selected_pairs)
    ref_num = embed1.shape[0]
    t_num = np.array([0 for k in top_k])
    t_mean = 0
    t_mrr = 0
    t_num1 = np.array([0 for k in top_k])
    t_mean1 = 0
    t_mrr1 = 0
    t_prec_set = set()
    frags = div_list(np.array(range(ref_num)), nums_threads)
    pool = multiprocessing.Pool(processes=len(frags))
    reses = list()
    for frag in frags:
        reses.append(
            pool.apply_async(cal_rank_by_div_embed,
                             (frag, dic, embed1[frag, :], embed2, top_k,
                              accurate, is_euclidean)))
    pool.close()
    pool.join()

    for res in reses:
        mean, mrr, num, mean1, mrr1, num1, prec_set = res.get()
        t_mean += mean
        t_mrr += mrr
        t_num += num
        t_mean1 += mean1
        t_mrr1 += mrr1
        t_num1 += num1
        t_prec_set |= prec_set

    assert len(t_prec_set) == ref_num

    acc = t_num / ref_num * 100
    for i in range(len(acc)):
        acc[i] = round(acc[i], 2)
    t_mean /= ref_num
    t_mrr /= ref_num
    if accurate:
        print(
            "accurate results: hits@{} = {}, mr = {:.3f}, mrr = {:.3f}, time = {:.3f} s "
            .format(top_k, acc, t_mean, t_mrr,
                    time.time() - t))
    else:
        print("hits@{} = {}, time = {:.3f} s ".format(top_k, acc,
                                                      time.time() - t))
    hits1 = acc[0]
    if selected_pairs is not None and len(selected_pairs) > 0:
        acc1 = t_num1 / ref_num * 100
        for i in range(len(acc1)):
            acc1[i] = round(acc1[i], 2)
        t_mean1 /= ref_num
        t_mrr1 /= ref_num
        hits1 = acc1[0]
        if accurate:
            print(
                "accurate results: hits@{} = {}, mr = {:.3f}, mrr = {:.3f}, time = {:.3f} s "
                .format(top_k, acc, t_mean, t_mrr,
                        time.time() - t))
        else:
            print("hits@{} = {}, time = {:.3f} s ".format(
                top_k, acc,
                time.time() - t))
    gc.collect()
    return t_prec_set, hits1
def eval_alignment_by_mcd_sim_mat(embed1,
                                  embed2,
                                  top_k,
                                  csls=0,
                                  accurate=False):
    t = time.time()
    sim_mat = sim_handler(embed1, embed2, csls)
    n, m = sim_mat.shape[0], sim_mat.shape[1]
    row_sum = np.sum(sim_mat, axis=1)
    col_sum = np.sum(sim_mat, axis=0)
    # print(type(row_sum), row_sum.shape)
    # print(type(col_sum), col_sum.shape)
    mcd = np.zeros((n, m))
    for i, j in product(range(n), range(m)):
        mu = (row_sum[i, ] + col_sum[j, ] - sim_mat[i, j]) / (n + m - 1)
        delte = np.square(sim_mat[i, j] - mu)
        mcd[i, j] = delte
    print("********************mcd*********************")
    print(mcd)

    ref_num = mcd.shape[0]
    t_num = [0 for k in top_k]
    t_mean = 0
    t_mrr = 0
    t_prec_set = set()
    tasks = div_list(np.array(range(ref_num)), P.nums_threads)
    pool = multiprocessing.Pool(processes=len(tasks))
    reses = list()
    for task in tasks:
        reses.append(
            pool.apply_async(cal_rank_by_sim_mat,
                             (task, mcd[task, :], top_k, accurate)))
    pool.close()
    pool.join()

    for res in reses:
        mean, mrr, num, prec_set = res.get()
        t_mean += mean
        t_mrr += mrr
        t_num += np.array(num)
        t_prec_set |= prec_set
    assert len(t_prec_set) == ref_num
    acc = np.array(t_num) / ref_num * 100
    for i in range(len(acc)):
        acc[i] = round(acc[i], 2)
    t_mean /= ref_num
    t_mrr /= ref_num
    if csls > 0:
        print("csls = {}".format(csls))
    if accurate:
        print(
            "accurate results: hits@{} = {}, mr = {:.3f}, mrr = {:.3f}, time = {:.3f} s "
            .format(top_k, acc, t_mean, t_mrr,
                    time.time() - t))
    else:
        print("hits@{} = {}, time = {:.3f} s ".format(top_k, acc,
                                                      time.time() - t))
    hits1 = acc[0]
    del sim_mat
    del mcd
    gc.collect()
    return top_k, acc, t_mean, t_mrr, time.time() - t