Esempio n. 1
0
    def spo_to_seq(self,
                   text: str,
                   spo_list: List[Dict[str, str]],
                   s_fst: bool = True) -> Dict[int, List[int]]:
        dic = {}
        tokens = self.hyper.tokenizer(text)

        for triplet in spo_list:

            # object = triplet["object"]
            # subject = triplet["subject"]
            object = self.hyper.tokenizer(triplet["object"])
            subject = self.hyper.tokenizer(triplet["subject"])

            object_pos = find(tokens, object) + len(object) - 1
            subject_pos = find(tokens, subject) + len(subject) - 1
            object_pos = text.find(object) + len(object) - 1
            relation_pos = self.relation_vocab[triplet["predicate"]]
            # subject_pos = text.find(subject) + len(subject) - 1

            # dangerous!!!
            # ------------------------------------------------- #
            if not s_fst:
                # ops (default spo)
                object_pos, subject_pos = subject_pos, object_pos
            # ------------------------------------------------- #

            if subject_pos in dic:
                dic[subject_pos].extend([relation_pos, object_pos])
            else:
                dic[subject_pos] = [relation_pos, object_pos]
        # if max(map(len, dic.values())) > self.hyper.max_decode_len * 2:
        #     print(dic)
        return dic
Esempio n. 2
0
    def spo_to_selection(
            self, text: str,
            spo_list: List[Dict[str, str]]) -> List[Dict[str, int]]:

        tokens = self.hyper.tokenizer(text)

        selection = []
        for triplet in spo_list:

            object = self.hyper.tokenizer(triplet["object"])
            subject = self.hyper.tokenizer(triplet["subject"])

            object_pos = find(tokens, object) + len(object) - 1
            subject_pos = find(tokens, subject) + len(subject) - 1
            # object_pos = text.find(object) + len(object) - 1
            relation_pos = self.relation_vocab[triplet["predicate"]]
            # subject_pos = text.find(subject) + len(subject) - 1

            selection.append({
                "subject": subject_pos,
                "predicate": relation_pos,
                "object": object_pos,
            })

        return selection
Esempio n. 3
0
    def spo_to_seq(self, text: str,
                   spo_list: List[Dict[str, str]]) -> Dict[int, List[int]]:
        dic = {}
        tokens = self.hyper.tokenizer(text)
        result = []
        for triplet in spo_list:

            object = self.hyper.tokenizer(triplet["object"])
            subject = self.hyper.tokenizer(triplet["subject"])

            object_pos = find(tokens, object) + len(object) - 1
            subject_pos = find(tokens, subject) + len(subject) - 1
            relation_pos = self.relation_vocab[triplet["predicate"]]
            result.extend([relation_pos, subject_pos, object_pos])
        # result.append(self.relation_vocab[NO_RELATION])
        return result
Esempio n. 4
0
 def to_ent(outp):
     # side effect!
     ent1, ent2 = [[0] * len(tokens) for _ in range(2)]
     for name in outp:
         # # TODO
         # print(tokens)
         # print(name)
         # exit()
         id = find(tokens, self.hyper.tokenizer(name))
         ent1[id] = 1
         ent2[id + len(self.hyper.tokenizer(name)) - 1] = 1
     return ent1, ent2
Esempio n. 5
0
        def to_in_key(inp, name):
            # side effect!
            if not inp:
                return 0, 0

            if name == "predicate":
                rel_in = self.relation_vocab[inp]
                out = rel_in
            else:
                k1 = find(tokens, self.hyper.tokenizer(inp))
                k2 = k1 + len(self.hyper.tokenizer(inp)) - 1
                out = k1, k2
            return out
Esempio n. 6
0
    def spo_to_bio(self, text: str, entities: List[str]) -> List[str]:
        text = self.hyper.tokenizer(text)
        bio = ["O"] * len(text)
        for e in entities:
            begin = find(text, self.hyper.tokenizer(e))
            # begin = text.find(e)
            end = begin + len(self.hyper.tokenizer(e)) - 1

            assert end <= len(text)

            bio[begin] = "B"
            for i in range(begin + 1, end + 1):
                bio[i] = "I"
        return bio