def get_all_sememes(self): """ Get the complete sememe list in HowNet :return: (List) a list of sememes """ if hasattr(self, "sememe_all"): return self.sememe_all else: package_directory = os.path.dirname(os.path.abspath(__file__)) f = get_resource("sememe_all.txt", 'r') buf = f.readlines()[0] self.sememe_all = buf.strip().split() return self.sememe_all
def _load_taxonomy(self): package_directory = os.path.dirname(os.path.abspath(__file__)) f = get_resource("sememe_triples_taxonomy.txt", "r") self.sememe_taxonomy = {} self.sememe_dict = {} for line in f.readlines(): line = line.strip().split(" ") for u in line[0].split("|"): for v in line[2].split("|"): self.sememe_taxonomy[(u, v)] = line[1] if not (u, line[1]) in self.sememe_dict: self.sememe_dict[(u, line[1])] = [] self.sememe_dict[(u, line[1])].append(line[2])
def __init__(self, use_sim=False): ''' Initialize HowNetDict :param use_sim: "lazy" option for loading similarity computation file. ''' try: package_directory = os.path.dirname(os.path.abspath(__file__)) data_dir = os.path.join(package_directory, "HowNet_dict_complete") self.en_map = dict() self.zh_map = dict() self.ids = dict() # load dict complete with get_resource(data_dir, 'rb') as origin_dict: word_dict = pickle.load(origin_dict) # self.max_count = len(word_dict) + 10 for key in word_dict: now_dict = word_dict[key] en_word = now_dict["en_word"].strip() zh_word = now_dict["ch_word"].strip() if en_word not in self.en_map: self.en_map[en_word] = list() self.en_map[en_word].append(now_dict) if zh_word not in self.zh_map: self.zh_map[zh_word] = list() self.zh_map[zh_word].append(now_dict) if now_dict["No"] not in self.ids: self.ids[now_dict["No"]] = list() self.ids[now_dict['No']].append(now_dict) # self.ids[now_dict["No"]] = now_dict # else: # now_dict['No'] = str(self.max_count) # self.ids[now_dict["No"]] = now_dict # self.max_count += 1 if use_sim: if not self.initialize_sememe_similarity_calculation(): self.hownet = None self.sememe_root = None self.sememe_sim_table = None except FileNotFoundError as e: print(e)