コード例 #1
0
ファイル: kdcoe.py プロジェクト: yjydfnhc/OpenEA
 def find_new_alignment_rel(self):
     t = time.time()
     un_aligned_ent1 = self.kgs.valid_entities1 + self.kgs.test_entities1
     un_aligned_ent2 = self.kgs.valid_entities2 + self.kgs.test_entities2
     embeds1 = tf.nn.embedding_lookup(self.ent_embeds, un_aligned_ent1).eval(session=self.session)
     embeds2 = tf.nn.embedding_lookup(self.ent_embeds, un_aligned_ent2).eval(session=self.session)
     mapping_mat = self.mapping_mat.eval(session=self.session)
     embeds1 = np.matmul(embeds1, mapping_mat)
     sim_mat = sim(embeds1, embeds2, normalize=True)
     print("find new alignment based on relational embeddings:")
     new_alignment_rel_index = find_potential_alignment_greedily(sim_mat, self.sim_th)
     check_new_alignment(new_alignment_rel_index)
     if new_alignment_rel_index is None or len(new_alignment_rel_index) == 0:
         return False
     stop = False
     if len(self.new_alignment_index) == 0:
         self.new_alignment_index = set(new_alignment_rel_index)
     elif len(set(new_alignment_rel_index) - self.new_alignment_index) == 0:
         stop = True
     else:
         self.new_alignment_index |= set(new_alignment_rel_index)
         stop = False
     check_new_alignment(self.new_alignment_index, context='check total new alignment')
     self.new_alignment = [(un_aligned_ent1[x], un_aligned_ent2[y]) for (x, y) in self.new_alignment_index]
     # del embeds1, embeds2, sim_mat
     print('finding new alignment costs time: {:.4f}s'.format(time.time() - t))
     return stop
コード例 #2
0
def update_labeled_alignment_y(labeled_alignment, sim_mat, len_valid_test):
    labeled_alignment_dict = dict()
    updated_alignment = set()
    for i, j in labeled_alignment:
        # Save all the i's which are aligned to j
        i_set = labeled_alignment_dict.get(j, set())
        i_set.add(i)
        labeled_alignment_dict[j] = i_set
    for j, i_set in labeled_alignment_dict.items():
        if len(i_set) == 1:
            # If j is aligned only with 1
            for i in i_set:
                updated_alignment.add((i, j))
        else:
            # Otherwise save as alignment the maximum similarity
            max_i = -1
            max_sim = -10
            for i in i_set:
                if sim_mat[i, j] > max_sim:
                    max_sim = sim_mat[i, j]
                    max_i = i
            updated_alignment.add((max_i, j))
    check_new_alignment(updated_alignment,
                        len_valid_test,
                        context="after editing (->)")
    return updated_alignment
コード例 #3
0
def update_labeled_alignment_x(pre_labeled_alignment, curr_labeled_alignment,
                               sim_mat, len_valid_test):
    labeled_alignment_dict = dict(pre_labeled_alignment)
    n1, n2 = 0, 0
    for i, j in curr_labeled_alignment:
        if labeled_alignment_dict.get(
                i, -1) == i and j != i and i < len_valid_test:
            # If before it was matched right and now you're matching wrongly
            n2 += 1
        if i in labeled_alignment_dict.keys():
            pre_j = labeled_alignment_dict.get(i)
            # Get previous match and check which was the similarity and new similarity
            pre_sim = sim_mat[i, pre_j]
            new_sim = sim_mat[i, j]
            if new_sim >= pre_sim:
                # If now you match it wrongly and before it was right and the similarity is increased
                if pre_j == i and j != i and i < len_valid_test:
                    n1 += 1
                labeled_alignment_dict[i] = j
        else:
            labeled_alignment_dict[i] = j
    print("update wrongly: ", n1, "greedy update wrongly: ", n2)
    pre_labeled_alignment = set(
        zip(labeled_alignment_dict.keys(), labeled_alignment_dict.values()))
    check_new_alignment(pre_labeled_alignment,
                        len_valid_test,
                        context="after editing (<-)")
    return pre_labeled_alignment
コード例 #4
0
ファイル: kdcoe.py プロジェクト: yjydfnhc/OpenEA
    def find_new_alignment_desc(self):
        t = time.time()
        print("sim th", self.desc_sim_th, self.sim_th)
        # find new alignment based on description embeddings
        print("find new alignment based on description embeddings:")
        to_align_links = pd.DataFrame(self.kgs.valid_links + self.kgs.test_links)
        un_aligned_ent1 = self.kgs.valid_entities1 + self.kgs.test_entities1
        un_aligned_ent2 = self.kgs.valid_entities2 + self.kgs.test_entities2
        desc1 = self.e_desc.loc[to_align_links.values[:, 0]].values
        desc2 = self.e_desc.loc[to_align_links.values[:, 1]].values
        desc_em1 = self.word_em[desc1]
        desc_em2 = self.word_em[desc2]
        dem1, dem2 = self.session.run(fetches=[self.desc_embedding1, self.desc_embedding2],
                                      feed_dict={self.desc1: desc_em1,
                                                 self.desc2: desc_em2})
        num = len(self.kgs.valid_links + self.kgs.test_links)
        search_tasks = task_divide(np.array(range(num)), self.args.test_threads_num // 2)

        pool = mp.Pool(processes=len(search_tasks))
        rests = list()
        for indexes in search_tasks:
            sub_embeds = dem1[indexes, :]
            rests.append(pool.apply_async(find_alignment, (sub_embeds, dem2, indexes, self.desc_sim_th)))
        pool.close()
        pool.join()
        new_alignment_desc_index = set()
        for rest in rests:
            alignment = rest.get()
            new_alignment_desc_index |= set(alignment)

        # new_alignment_desc_index = set()
        # for indexes in search_tasks:
        #     sub_embeds = dem1[indexes, :]
        #     alignment = find_alignment(sub_embeds, dem2, indexes, self.desc_sim_th)
        #     new_alignment_desc_index |= set(alignment)

        if new_alignment_desc_index is None or len(new_alignment_desc_index) == 0:
            return False
        check_new_alignment(new_alignment_desc_index)
        stop = False
        if len(self.new_alignment_index) == 0:
            self.new_alignment_index = set(new_alignment_desc_index)
        elif len(set(new_alignment_desc_index) - self.new_alignment_index) == 0:
            stop = True
        else:
            self.new_alignment_index |= set(new_alignment_desc_index)
            stop = False
        check_new_alignment(self.new_alignment_index, context='check total new alignment')
        self.new_alignment = [(un_aligned_ent1[x], un_aligned_ent2[y]) for (x, y) in self.new_alignment_index]
        # del desc_em1, desc_em2, dem1, dem2, desc_sim
        print('finding new alignment costs time: {:.4f}s'.format(time.time() - t))
        return stop
コード例 #5
0
def update_labeled_alignment_x(pre_labeled_alignment, curr_labeled_alignment, sim_mat):
    labeled_alignment_dict = dict(pre_labeled_alignment)
    n1, n2 = 0, 0
    for i, j in curr_labeled_alignment:
        if labeled_alignment_dict.get(i, -1) == i and j != i:
            n2 += 1
        if i in labeled_alignment_dict.keys():
            pre_j = labeled_alignment_dict.get(i)
            pre_sim = sim_mat[i, pre_j]
            new_sim = sim_mat[i, j]
            if new_sim >= pre_sim:
                if pre_j == i and j != i:
                    n1 += 1
                labeled_alignment_dict[i] = j
        else:
            labeled_alignment_dict[i] = j
    print("update wrongly: ", n1, "greedy update wrongly: ", n2)
    pre_labeled_alignment = set(zip(labeled_alignment_dict.keys(), labeled_alignment_dict.values()))
    check_new_alignment(pre_labeled_alignment, context="after editing (<-)")
    return pre_labeled_alignment
コード例 #6
0
def update_labeled_alignment_y(labeled_alignment, sim_mat):
    labeled_alignment_dict = dict()
    updated_alignment = set()
    for i, j in labeled_alignment:
        i_set = labeled_alignment_dict.get(j, set())
        i_set.add(i)
        labeled_alignment_dict[j] = i_set
    for j, i_set in labeled_alignment_dict.items():
        if len(i_set) == 1:
            for i in i_set:
                updated_alignment.add((i, j))
        else:
            max_i = -1
            max_sim = -10
            for i in i_set:
                if sim_mat[i, j] > max_sim:
                    max_sim = sim_mat[i, j]
                    max_i = i
            updated_alignment.add((max_i, j))
    check_new_alignment(updated_alignment, context="after editing (->)")
    return updated_alignment