def get_sememes_by_word(self, word, structured=False, lang="zh", merge=False, expanded_layer=-1): """ Given specific word, you can get corresponding HowNet annotation. :param word: (str)specific word(en/zh/id) you want to search in HowNet. You can use "I WANT ALL" or "*" to specify that you need annotations of all words. :param structured: (bool)whether you want to retrieve structured sememe trees :param lang: (str)only works when structured == False. You can determine the language of the name of every sememe node in the retrieved tree. There are two options("en"/"zh") for this param. :param merge: (boolean)only works when structured == False. Decide whether to merge multi-sense word query results into one :param expanded_layer: (int)only works when structured == False. Continously expand k layer By default, it will be set to -1 (expand full layers) :return: list of converted sememe trees in accordance with requirements specified by the params """ queryResult = self[word] result = list() if structured: for item in queryResult: try: result.append({"word": item, "tree": GenSememeTree(item["Def"], word)}) except Exception as e: print("Generate Sememe Tree Failed for", item["No"]) print("Exception:", e) continue else: if lang == 'zh': lang = 'ch' name = lang + "_word" lang = "name_" + lang if merge: result = dict() for item in queryResult: try: if not merge: result.append( {"word": item[name], "sememes": self._expand_tree(GenSememeTree(item["Def"], word), lang, expanded_layer)}) else: if item[name] not in result: result[item[name]] = set() result[item[name]] |= set( self._expand_tree(GenSememeTree(item["Def"], word), lang, expanded_layer)) except Exception as e: print(word) print("Wrong Item:", item) # print("Generate Sememe Tree Failed for", item["No"]) print("Exception:", e) raise e if merge: if len(result.keys()) == 1: key = list(result.keys())[0] result = result[key] return result
def get_trees(text: Text, pos='*') -> List[SenseTree]: from OpenHowNet.SememeTreeParser import GenSememeTree text = text.strip() if not text: return [] # 只需要解析不同的树即可 words = get_words(text, pos=pos) uniques = {item['Def']: item['No'] for item in words} unique_rs = [item for item in words if item['No'] in uniques.values()] trees = [] for item in unique_rs: tree = GenSememeTree(item["Def"], text) trees.append({"word": item, "tree": tree}) sts = build_trees(trees) return sts
def visualize_sememe_trees(self, word, K=None): """ :param word: (str)The target word to be visualized in command line. Notice that single word may correspond to multiple HowNet annotations. :param K: (int)The maximum number of visualized words, ordered by id (ascending). Illegal number will be automatically ignored and the function will display all retrieved results. :return: """ queryResult = list(self[word]) queryResult.sort(key=lambda x: x["No"]) print("Find {0} result(s)".format(len(queryResult))) if K is not None and K >= 1 and type(K) == int: queryResult = queryResult[:K] for index, item in enumerate(queryResult): # tree = GenSememeTree(item["Def"], returnNode=True) tree = GenSememeTree(item["Def"], word, returnNode=True) tree = RenderTree(tree) print("Display #{0} sememe tree".format(index)) for pre, fill, node in tree: print("%s[%s]%s" % (pre, node.role, node.name))