Beispiel #1
0
 def get_sememes_by_word(self, word, structured=False, lang="zh", merge=False, expanded_layer=-1):
     """
     Given specific word, you can get corresponding HowNet annotation.
     :param word: (str)specific word(en/zh/id) you want to search in HowNet.
                   You can use "I WANT ALL" or "*" to specify that you need annotations of all words.
     :param structured: (bool)whether you want to retrieve structured sememe trees
     :param lang: (str)only works when structured == False. You can determine the language of the name of every sememe node in the retrieved tree.
                 There are two options("en"/"zh") for this param.
     :param merge: (boolean)only works when structured == False. Decide whether to merge multi-sense word query results into one
     :param expanded_layer: (int)only works when structured == False. Continously expand k layer
                             By default, it will be set to -1 (expand full layers)
     :return: list of converted sememe trees in accordance with requirements specified by the params
     """
     queryResult = self[word]
     result = list()
     if structured:
         for item in queryResult:
             try:
                 result.append({"word": item, "tree": GenSememeTree(item["Def"], word)})
             except Exception as e:
                 print("Generate Sememe Tree Failed for", item["No"])
                 print("Exception:", e)
                 continue
     else:
         if lang == 'zh': lang = 'ch'
         
         name = lang + "_word"
         lang = "name_" + lang
         if merge:
             result = dict()
         for item in queryResult:
             try:
                 if not merge:
                     result.append(
                         {"word": item[name],
                          "sememes": self._expand_tree(GenSememeTree(item["Def"], word), lang, expanded_layer)})
                 else:
                     if item[name] not in result:
                         result[item[name]] = set()
                     result[item[name]] |= set(
                         self._expand_tree(GenSememeTree(item["Def"], word), lang, expanded_layer))
             except Exception as e:
                 print(word)
                 print("Wrong Item:", item)
                 # print("Generate Sememe Tree Failed for", item["No"])
                 print("Exception:", e)
                 raise e
         if merge:
             if len(result.keys()) == 1:
                 key = list(result.keys())[0]
                 result = result[key]
     return result
Beispiel #2
0
def get_trees(text: Text, pos='*') -> List[SenseTree]:
    from OpenHowNet.SememeTreeParser import GenSememeTree

    text = text.strip()
    if not text:
        return []

    # 只需要解析不同的树即可
    words = get_words(text, pos=pos)
    uniques = {item['Def']: item['No'] for item in words}

    unique_rs = [item for item in words if item['No'] in uniques.values()]
    trees = []
    for item in unique_rs:
        tree = GenSememeTree(item["Def"], text)
        trees.append({"word": item, "tree": tree})
    sts = build_trees(trees)
    return sts
Beispiel #3
0
    def visualize_sememe_trees(self, word, K=None):
        """

        :param word: (str)The target word to be visualized in command line. Notice that single word may correspond to multiple HowNet annotations.
        :param K: (int)The maximum number of visualized words, ordered by id (ascending). Illegal number will be automatically ignored and the function will display all retrieved results.
        :return:
        """
        queryResult = list(self[word])
        queryResult.sort(key=lambda x: x["No"])
        print("Find {0} result(s)".format(len(queryResult)))
        if K is not None and K >= 1 and type(K) == int:
            queryResult = queryResult[:K]

        for index, item in enumerate(queryResult):
            # tree = GenSememeTree(item["Def"], returnNode=True)
            tree = GenSememeTree(item["Def"], word, returnNode=True)
            tree = RenderTree(tree)
            print("Display #{0} sememe tree".format(index))
            for pre, fill, node in tree:
                print("%s[%s]%s" % (pre, node.role, node.name))