コード例 #1
0
 def get_middle_names(self) -> set:
     if self.__middle_names is None:
         filename = get_data_file("middle_names.txt", folder="vietnamese")
         logging.info("%s loaded" % filename)
         arr = [normalize(x) for x in readlines(filename)]
         self.__middle_names = set(list(dict.fromkeys(arr)))
     return self.__middle_names
コード例 #2
0
 def get_short_word(self) -> set:
     if self.__short_word is None:
         filename = get_data_file("short_words.txt", folder="vietnamese")
         logging.info("%s loaded" % filename)
         arr = [
             normalize(x) for x in readlines(filename) if len(x.strip()) > 0
         ]
         self.__short_word = set(list(dict.fromkeys(arr)))
     return self.__short_word
コード例 #3
0
 def get_first_sent_word(self) -> set:
     if self.__first_sent_word is None:
         filename = path.join(self.__cur_dir, "first_words.txt")
         logging.info("%s loaded" % filename)
         arr = [
             normalize(x) for x in readlines(filename) if len(x.strip()) > 0
         ]
         self.__first_sent_word = set(list(dict.fromkeys(arr)))
     return self.__first_sent_word
コード例 #4
0
ファイル: vietnamese.py プロジェクト: microvnn/language_vn
 def get_family_names(self) -> set:
     if self.__family_names is None:
         filename = get_data_file("family_names.txt", folder="vietnamese")
         # filename = path.join(conf.vietnamese_path, "family_names.txt")
         arr = [
             normalize(x) for x in readlines(filename) if len(x.strip()) > 0
         ]
         self.__family_names = set(list(dict.fromkeys(arr)))
     return self.__family_names
コード例 #5
0
 def get_location(self) -> set:
     if self.__location is None:
         files = {"loc.lv2.txt", "loc.lv2.fix.txt"}
         arr = []
         for f in files:
             filename = path.join(self.__cur_dir, f)
             logging.info("%s loaded" % filename)
             arr += [
                 normalize(x) for x in readlines(filename)
                 if len(x.strip()) > 0
             ]
         arr = list(dict.fromkeys(arr))
         self.__location = set(arr)
     return self.__location
コード例 #6
0
 def get_given_name(self) -> set:
     if self.__given_names is None:
         files = {"names.txt", "company.txt"}
         arr = []
         for f in files:
             filename = get_data_file(f, folder="vocals")
             logging.info("%s loaded" % filename)
             arr += [
                 normalize(x) for x in readlines(filename)
                 if len(x.strip()) > 0
             ]
         arr = list(dict.fromkeys(arr))
         self.__given_names = set(arr)
     return self.__given_names
コード例 #7
0
    def get_location_lv3(self):
        if self.__location_lv_3 is None:
            files = {"loc.lv3.txt"}
            arr = []
            for f in files:
                filename = path.join(self.__cur_dir, f)
                logging.info("%s loaded" % filename)
                arr += [
                    normalize(x) for x in readlines(filename)
                    if len(x.strip()) > 0
                ]

            arr = list(dict.fromkeys(arr))
            max_length = max([len(x.split()) for x in arr])
            self.__location_lv_3 = (max_length, set(arr))
        return self.__location_lv_3
コード例 #8
0
 def get_vn_dict(self) -> set:
     if self.__vn_dict is None:
         files = {
             "vocabulary_standard.txt",
             "vocabulary_build.txt",
             "vocabulary.txt",
             "animal.txt",
         }
         arr = []
         for f in files:
             filename = get_data_file(f, folder="vocals")
             logging.info("%s loaded" % filename)
             arr += [
                 normalize(x) for x in readlines(filename)
                 if len(x.strip()) > 0
             ]
         arr = list(dict.fromkeys(arr))
         self.__vn_dict = set(arr)
     return self.__vn_dict
コード例 #9
0
    def get_hard_dict(self):
        if self.__hard_dict is None:
            filename = get_data_file("hard_dict.txt", folder="vietnamese")
            logging.info("%s loaded" % filename)
            dic, sizeof = dict(), dict()
            arr = [
                normalize(x) for x in readlines(filename) if len(x.strip()) > 0
            ]
            for x in arr:
                sl = x.split()
                key = " ".join(sl[0:2])
                if key not in dic:
                    dic[key] = set([])
                    sizeof[key] = 0
                dic[key].add(x)
                if len(sl) > sizeof[key]:
                    sizeof[key] = len(sl)
            self.__hard_dict = dict()
            for i in dic:
                self.__hard_dict[i] = dict(max_len=sizeof.get(i),
                                           data=dic.get(i))

        return self.__hard_dict