def _generate_char_dict(self): """ generate the char dict and ord map dict json file according to the lexicon list. gather all the single characters used in lexicon list. :return: """ char_lexicon_set = set() for lexcion in self._lexicon_list: for s in lexcion: char_lexicon_set.add(s) log.info('Char set length: {:d}'.format(len(char_lexicon_set))) char_lexicon_list = list(char_lexicon_set) char_dict_builder = establish_char_dict.CharDictBuilder() char_dict_builder.write_char_dict(char_lexicon_list, save_path=self._char_dict_path) char_dict_builder.map_ord_to_index(char_lexicon_list, save_path=self._ord_map_dict_path) log.info('Write char dict map complete')
def generate_char_dict(char_dict_file, save_dir): """ Example char dict file is stored in data/char_dict/chinese_dict.txt :param char_dict_file: :param save_dir: :return: """ generator = establish_char_dict.CharDictBuilder() char_dict_path = ops.join(save_dir, 'char_dict.json') ord_2_index_map_path = ops.join(save_dir, 'ord_2_index_map.json') index_2_ord_map_path = ops.join(save_dir, 'index_2_ord_map.json') generator.write_char_dict(char_dict_file, char_dict_path) generator.map_ord_to_index(char_dict_file, ord_2_index_map_path) generator.map_index_to_ord(char_dict_file, index_2_ord_map_path) print('Generate {:s}, {:s} and {:s} complete'.format(char_dict_path, ord_2_index_map_path, index_2_ord_map_path)) return
def _generate_char_dict(self): """ :return: """ char_lexicon_set = set() for lexcion in self._lexicon_list: char_list = [s for s in lexcion] char_lexicon_set = char_lexicon_set.union(set(char_list)) log.info('Char set length: {:d}'.format(len(char_lexicon_set))) char_lexicon_list = list(char_lexicon_set) char_dict_builder = establish_char_dict.CharDictBuilder() char_dict_builder.write_char_dict(char_lexicon_list, save_path=self._char_dict_path) char_dict_builder.map_ord_to_index(char_lexicon_list, save_path=self._ord_map_dict_path) log.info('Write char dict map complete')