def get_input_rules_from_giving_words(input_words): '''输入汉字,输出相应的输入规则''' try: assert isinstance(input_words, unicode) except: input_words = input_words.decode('utf-8') coding_map = {'a': '2', 'c': '2', 'b': '2', 'e': '3', 'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5', 'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'q': '7', 'p': '7', 's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8', 'y': '9', 'x': '9', 'z': '9'} ws = WordsSearch() pinyin_str = ' '.join(ws.get_splited_pinyin(input_words)[0]).replace('*', '') role_num = ''.join([coding_map[letter] for letter in pinyin_str if letter.isalpha()]) print '\t'.join((words.decode('utf-8'), pinyin_str, role_num))
def convert_pinyin_to_rules(): '''把基础词库中的拼音转换为输入规则(数字序列)''' coding_map = {'a': '2', 'c': '2', 'b': '2', 'e': '3', 'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5', 'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'q': '7', 'p': '7', 's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8', 'y': '9', 'x': '9', 'z': '9'} ws = WordsSearch() base_filename = os.path.join(src_file_path, 'combine_top60000_and_5041.txt') base_file_with_pinyin = os.path.join(src_file_path, 'combine_top60000_and_5041_pinyin_role.txt') with codecs.open(base_filename, encoding='utf-8') as f,\ codecs.open(base_file_with_pinyin, mode='wb', encoding='utf-8') as wf: whole_word_list = (item.split('\t')[0] for item in f.readlines()) for word in whole_word_list: pinyin_str = ' '.join(ws.get_splited_pinyin(word)[0]).replace('*', '') role_num = ''.join([coding_map[letter] for letter in pinyin_str if letter.isalpha()]) com_str = '\t'.join((word, pinyin_str, role_num)) wf.write(com_str+'\n')
def convert_pinyin_to_rules(): '''把基础词库中的拼音转换为输入规则(数字序列)''' coding_map = { 'a': '2', 'c': '2', 'b': '2', 'e': '3', 'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5', 'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'q': '7', 'p': '7', 's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8', 'y': '9', 'x': '9', 'z': '9' } ws = WordsSearch() base_filename = os.path.join(src_file_path, 'combine_top60000_and_5041.txt') base_file_with_pinyin = os.path.join( src_file_path, 'combine_top60000_and_5041_pinyin_role.txt') with codecs.open(base_filename, encoding='utf-8') as f,\ codecs.open(base_file_with_pinyin, mode='wb', encoding='utf-8') as wf: whole_word_list = (item.split('\t')[0] for item in f.readlines()) for word in whole_word_list: pinyin_str = ' '.join(ws.get_splited_pinyin(word)[0]).replace( '*', '') role_num = ''.join([ coding_map[letter] for letter in pinyin_str if letter.isalpha() ]) com_str = '\t'.join((word, pinyin_str, role_num)) wf.write(com_str + '\n')
def get_input_rules_from_giving_words(input_words): '''输入汉字,输出相应的输入规则''' try: assert isinstance(input_words, unicode) except: input_words = input_words.decode('utf-8') coding_map = { 'a': '2', 'c': '2', 'b': '2', 'e': '3', 'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5', 'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'q': '7', 'p': '7', 's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8', 'y': '9', 'x': '9', 'z': '9' } ws = WordsSearch() pinyin_str = ' '.join(ws.get_splited_pinyin(input_words)[0]).replace( '*', '') role_num = ''.join( [coding_map[letter] for letter in pinyin_str if letter.isalpha()]) print '\t'.join((words.decode('utf-8'), pinyin_str, role_num))