Beispiel #1
0
 def get_all_sememes(self):
     """
     Get the complete sememe list in HowNet
     :return: (List) a list of sememes
     """
     if hasattr(self, "sememe_all"):
         return self.sememe_all
     else:
         package_directory = os.path.dirname(os.path.abspath(__file__))
         f = get_resource("sememe_all.txt", 'r')
         buf = f.readlines()[0]
         self.sememe_all = buf.strip().split()
         return self.sememe_all
Beispiel #2
0
 def _load_taxonomy(self):
     package_directory = os.path.dirname(os.path.abspath(__file__))
     f = get_resource("sememe_triples_taxonomy.txt", "r")
     self.sememe_taxonomy = {}
     self.sememe_dict = {}
     for line in f.readlines():
         line = line.strip().split(" ")
         for u in line[0].split("|"):
             for v in line[2].split("|"):
                 self.sememe_taxonomy[(u, v)] = line[1]
             if not (u, line[1]) in self.sememe_dict:
                 self.sememe_dict[(u, line[1])] = []
             self.sememe_dict[(u, line[1])].append(line[2])
Beispiel #3
0
    def __init__(self, use_sim=False):
        '''
        Initialize HowNetDict
        :param use_sim: "lazy" option for loading similarity computation file.
        '''
        try:
            package_directory = os.path.dirname(os.path.abspath(__file__))
            data_dir = os.path.join(package_directory, "HowNet_dict_complete")
            self.en_map = dict()
            self.zh_map = dict()
            self.ids = dict()

            # load dict complete
            with get_resource(data_dir, 'rb') as origin_dict:
                word_dict = pickle.load(origin_dict)
                # self.max_count = len(word_dict) + 10
            for key in word_dict:
                now_dict = word_dict[key]
                en_word = now_dict["en_word"].strip()
                zh_word = now_dict["ch_word"].strip()
                if en_word not in self.en_map:
                    self.en_map[en_word] = list()
                self.en_map[en_word].append(now_dict)
                if zh_word not in self.zh_map:
                    self.zh_map[zh_word] = list()
                self.zh_map[zh_word].append(now_dict)
                if now_dict["No"] not in self.ids:
                    self.ids[now_dict["No"]] = list()
                self.ids[now_dict['No']].append(now_dict)
                #     self.ids[now_dict["No"]] = now_dict
                # else:
                #     now_dict['No'] = str(self.max_count)
                #     self.ids[now_dict["No"]] = now_dict
                #     self.max_count += 1
            if use_sim:
                if not self.initialize_sememe_similarity_calculation():
                    self.hownet = None
                    self.sememe_root = None
                    self.sememe_sim_table = None
        except FileNotFoundError as e:
            print(e)