def request_sougou_cloud_word(self, role_num): '''输入:数字串 输出:与该字符串相匹配的搜狗云词''' import sogou_cloud_words if isinstance(role_num, int): role_num = str(role_num) matched_sentence = sogou_cloud_words.get_cloud_words(role_num)[0] return matched_sentence # sg = SouGouCloudWord(module_path='E:\SVN\chocolate_ime\script\gen_update_words\gen_hot_words') # print sg.get_pinyin('数字串') # print sg.form_sentence_to_role_num('数字串')#7489424826 # sg.request_sougou_cloud_word('7489424826') # if __name__ == '__main__': # vnd = VarifyNgramData('E:\SVN\chocolate_ime\script\gen_update_words\gen_hot_words') #print vnd.get_pinyin_list('谁知道') # print vnd.get_input_role('yes') # prefix, mapping_word_list = vnd.get_code_sentence('快快乐乐的') # print prefix, mapping_word_list # mulit_word_pinyin_dic = gen_multi_word_pinyin_dic() # print mulit_word_pinyin_dic[u'量'] # gw = GenMappingWordLengh()
def sougou_cloud_word(self): import sogou_cloud_words src_data_path = r'E:\SVN\linguistic_model\N_gram\varify_sample_lvjun' varify_sample_filename = os.path.join(src_data_path, 'forum_high_freq_sentence_role_num.txt') des_path = r'E:\SVN\linguistic_model\N_gram\varify_sample_lvjun' checkout_sample_filename_backward = os.path.join(des_path, 'forum_high_freq_sentence_sougou_checkout.txt') with codecs.open(varify_sample_filename, encoding='utf-8') as f, \ codecs.open(checkout_sample_filename_backward, mode='wb', encoding='utf-8') as wf: count = 0 for line in f.readlines(): mapping_sentence_role_num_list = [] count += 1 print count splited_line = line.split() sentence = splited_line[0] key_str = splited_line[-1].strip().encode('utf-8') matched_sentence = sogou_cloud_words.get_cloud_words(key_str)[0] if matched_sentence == sentence: mapping_sentence_role_num_list.append('%s\t%s\n'%(sentence, key_str)) wf.writelines(mapping_sentence_role_num_list)
def get_pinyin(sentence): add_pinyin = AddPinyin() pinyin_list = [] for word in sentence: pinyin = add_pinyin.get_pinyin(word) pinyin_list.append(pinyin) pinyin_str = ''.join(pinyin_list) role_num = ''.join([coding_map[letter] for letter in pinyin_str if letter.isalpha()]) return role_num def post_to_ali(role_num): url = 'http://ali_0000.baiwenbao.com:5000/%s'%role_num html = urllib2.urlopen(url).read() json_list = simplejson.loads(html) print json_list[0] def post_to_local(role_num): url = 'http://127.0.0.1:5000/%s'%role_num html = urllib2.urlopen(url).read() json_list = simplejson.loads(html) print json_list[0] if __name__ == "__main__": start_time = time.time() sentence = u'学挖掘机到底哪家强中国山东找蓝翔' role_num = get_pinyin(sentence) # role_num = '945464983248432694224336335433826' # role_num = 'jintianyourenqudapingpangqiuma' post_to_ali(role_num) # post_to_local(role_num) print get_cloud_words(role_num)[0]