コード例 #1
0
    def load_dictionary(self, source, language, dict_path):
        counter, start_at = 0, int(time.time())
        print("\nLoading entity_dictionary from: {}".format(dict_path))
        with open(dict_path, "r", encoding="utf-8") as rf:
            for line in rf:
                line_arr = line.strip().split("\t\t")
                if len(line_arr) != 4: continue
                title, sub_title, uris, entity_id = line_arr
                uris = uris.split("::;")

                counter += 1

                entity = Entity(entity_id, title, sub_title, source, language)
                for uri in uris:
                    self._uri_2_id[uri] = entity_id

                self._fulltitle_2_id[entity.get_full_title()] = entity_id

                title_mention = self.get_mention_from_title(
                    entity.get_full_title())
                if self._mention_2_ids.get(title_mention) is None:
                    self._mention_2_ids[title_mention] = dict()
                self._mention_2_ids[title_mention][entity_id] = None

                self.entity_dict[entity_id] = entity

        print("Loaded, #{}, time: {}.".format(
            counter,
            str(datetime.timedelta(seconds=int(time.time()) - start_at))))