def qa(question): #初始化jieba分词器 T.jieba_initialize() #切换到语料库所在工作目录 mybot_path = './' # os.chdir(mybot_path) mybot = aiml.Kernel() mybot.learn( os.path.split(os.path.realpath(__file__))[0] + "/resources/std-startup.xml") mybot.learn( os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml") mybot.learn( os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml") mybot.learn( os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml") mybot.learn( os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml") mybot.learn( os.path.split(os.path.realpath(__file__))[0] + "/resources/OrdinaryQuestion.aiml") mybot.learn( os.path.split(os.path.realpath(__file__))[0] + "/resources/Common conversation.aiml") # mybot.respond('Load Doc Snake') #载入百科属性列表 input_message = question if len(input_message) > 60: print(mybot.respond("句子长度过长")) elif input_message.strip() == '': print(mybot.respond("无")) print(input_message) message = T.wordSegment(input_message) # 去标点 print('word Seg:' + message) print('词性:') words = T.postag(input_message) if message == 'q': exit() else: response = mybot.respond(message) print("=======") print(response) print("=======") if response == "": ans = mybot.respond('找不到答案') # print(robot_id + ":" + ans) print("{0}:{1}".format(robot_id, ans)) # 百科搜索 elif response[0] == '#': # 匹配百科 if response.__contains__("searchbaike"): print("search from baike") print(response) res = response.split(':') #实体 entity = str(res[1]).replace(" ", "") #属性 attr = str(res[2]).replace(" ", "") print(entity + '<---->' + attr) ans = baike.query(entity, attr) # 如果命中答案 if type(ans) == list: print("{0}:{1}".format(robot_id, QAT.ptranswer(ans, False))) elif ans.decode('utf-8').__contains__(u'::找不到'): #百度摘要+Bing摘要 print("通用搜索") log.info("通用搜索") ans = search_summary.kwquery(input_message) # 匹配不到模版,通用查询 elif response.__contains__("NoMatchingTemplate"): print("NoMatchingTemplate") ans = search_summary.kwquery(input_message) if len(ans) == 0: ans = mybot.respond('找不到答案') logs.info("{0}:{1}".format(robot_id, ans)) elif len(ans) > 1: logs.info(sys.exc_info()) logs.info("不确定候选答案") logs.info("[{0}][func:{1}][line:{2}]:不确定候选答案".format( sys._getframe().f_code.co_filename, sys._getframe().f_code.co_name, sys._getframe().f_lineno)) print(robot_id + ': ') for a in ans: print(a) # print(a.encode("utf8")) else: print('{0}:{1}'.format(robot_id, ans[0])) # 匹配模版 else: print("{}: {}".format(robot_id, response))
def __inner_job(): start = time.time() text_binary = analyze_current_screen_text( directory=data_directory, compress_level=image_compress_level[0]) keywords = get_text_from_image(image_data=text_binary, ) if not keywords: print("text not recognize") return true_flag, question, answers = parse_question_and_answer(keywords) #questions=question.decode('unicode-escape') #new_ans=[] #for ans in answers: # new_ans.append(ans.decode('unicode-escape')) print('-' * 72) print(question) print('-' * 72) print("\n".join(answers)) # notice browser if enable_chrome: with question_obj.get_lock(): question_obj.value = question keyboard.press("space") search_question = pre_process_question(question) summary = baidu_count(search_question, answers, timeout=timeout) summary_li = sorted(summary.items(), key=operator.itemgetter(1), reverse=True) data = [("选项", "同比")] for a, w in summary_li: data.append((a, w)) table = AsciiTable(data) print(table.table) print("*" * 72) if true_flag: print("肯定回答(**): ", summary_li[0][0]) print("否定回答( ): ", summary_li[-1][0]) else: print("肯定回答( ): ", summary_li[0][0]) print("否定回答(**): ", summary_li[-1][0]) print("*" * 72) ############################################################## input_message = question if len(input_message) > 60: print(mybot.respond("句子长度过长")) elif input_message.strip() == '': print(mybot.respond("无")) #print(input_message) message = T.wordSegment(input_message) # 去标点 #print('word Seg:' + message) #print('词性:') words = T.postag(input_message) if message == 'q': exit() else: response = mybot.respond(message) #print("=======") #print(response) #print("=======") if response == "": ans = mybot.respond('找不到答案') print('Eric:' + ans) # 百科搜索 elif response[0] == '#': # 匹配百科 if response.__contains__("searchbaike"): #print("searchbaike") #print(response) res = response.split(':') # 实体 entity = str(res[1]).replace(" ", "") # 属性 attr = str(res[2]).replace(" ", "") #print(entity + '<---->' + attr) ans = baike.query(entity, attr) # 如果命中答案 if type(ans) == list: print('Eric:' + QAT.ptranswer(ans, False)) elif ans.decode('utf-8').__contains__(u'::找不到'): # 百度摘要+Bing摘要 print("通用搜索") ans = search_summary.kwquery(input_message) # 匹配不到模版,通用查询 elif response.__contains__("NoMatchingTemplate"): #print("NoMatchingTemplate") ans = search_summary.kwquery(input_message, answers) if len(ans) == 0: print('Eric:' + '找不到答案') elif len(ans) > 1: print("不确定候选答案") print('Eric: ') for a in ans: print(a) else: print('Eric:' + ans[0]) # 匹配模版 else: print('Eric:' + response) end = time.time() print("use {0} 秒".format(end - start)) save_screen(directory=data_directory)
def main(): # 初始化jb分词器 T.jieba_initialize() # 切换到语料库所在工作目录 mybot_path = './' os.chdir(mybot_path) # 加载AIML的规则 mybot = aiml.Kernel() mybot.learn( os.path.split(os.path.realpath(__file__))[0] + "/resources/std-startup.xml") mybot.learn( os.path.split(os.path.realpath(__file__))[0] + "/resources/abc.aiml") mybot.learn( os.path.split(os.path.realpath(__file__))[0] + "/resources/bot_profile.aiml") # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/general.aiml") mybot.learn( os.path.split(os.path.realpath(__file__))[0] + "/resources/infor.aiml") mybot.learn( os.path.split(os.path.realpath(__file__))[0] + "/resources/main.aiml") mybot.learn( os.path.split(os.path.realpath(__file__))[0] + "/resources/new07281.aiml") mybot.learn( os.path.split(os.path.realpath(__file__))[0] + "/resources/salutations.aiml") mybot.learn( os.path.split(os.path.realpath(__file__))[0] + "/resources/virus0727.aiml") # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/zextra_weibao.aiml") # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml") # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml") # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml") # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml") # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/OrdinaryQuestion.aiml") # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/Common conversation.aiml") # mybot.respond('Load Doc Snake') #载入百科属性列表 # print ''' # .----------------. .-----------------. .----------------. .----------------. .----------------. # | .--------------. || .--------------. || .--------------. || .--------------. || .--------------. | # | | _______ | || | ____ _____ | || | __ | || | ___ ____ | || | _________ | | # | | / ___ | | || ||_ \|_ _| | || | / \ | || | |_ ||_ _| | || | |_ ___ | | | # | | | (__ \_| | || | | \ | | | || | / /\ \ | || | | |_/ / | || | | |_ \_| | | # | | '.___`-. | || | | |\ \| | | || | / /__\ \ | || | | __'. | || | | _| _ | | # | | |`\____) | | || | _| |_\ |_ | || | _/ / \ \_ | || | _| | \ \_ | || | _| |___/ | | | # | | |_______.' | || ||_____|\____| | || ||____| |____|| || | |____||____| | || | |_________| | | # | | | || | | || | | || | | || | | | # | '--------------' || '--------------' || '--------------' || '--------------' || '--------------' | # '----------------' '----------------' '----------------' '----------------' '----------------' # Eric:你好,我是Eric。╭(╯^╰)╮ # ''' print('泰康小康:你好,我是小康。╭(╯^╰)╮') # 对问题的处理流程 while True: # 输入这个问题 input_message = raw_input("您想问什么 >> ") # 对输入问题进行简单的处理: # 忽略过长(超过60)的问题 # 忽略空问题 if len(input_message) > 60: print(mybot.respond("句子长度过长")) continue elif input_message.strip() == '': print(mybot.respond("无")) continue # 利用Tools工具对问题进行处理 print(input_message) message = T.wordSegment(input_message) # 去标点 print('word Seg:' + message) # print('词性:') words = T.postag(input_message) # 退出 if message == 'q': exit() # 返回信息的优先级 else: # 首先是AIML的模板匹配 response = mybot.respond(message) print("=======") print(response) print(len(response.decode('utf8'))) print("=======")
def find_ans(question=''): global raw log = '答案来源:' cnt = 0 input_message = question if len(input_message) > 60: return (mybot.respond("句子长度过长"), log) elif input_message.strip() == '': return (mybot.respond("无"), log) # 检索本地知识库得到答案,代码在QA1文件夹 cnt += 1 log += str(cnt) + ':检索本地知识库\n' old_client = Client() ans, log_tmp = old_client.qa_find_ans(input_message) log += log_tmp if ans != "不知道~": return (clean_str(ans), log) log += '本地知识库找不到答案或答案不确定\n' message = T.wordSegment(input_message) response = mybot.respond(message) # log += 'AIML模板返回内容:' + response + '\n' if response == "": ans = mybot.respond('不知道~') return (ans, log) elif response[0] == '#': cnt += 1 log += str(cnt) + ':匹配不到问句模板\n' if response.__contains__("searchbaike"): res = response.split(':') entity = str(res[1]).replace(" ","") attr = str(res[2]).replace(" ","") cnt += 1 log += str(cnt) + ':匹配到实体属性模板,' + '实体:' + entity + ' 属性:' + attr + '\n' ans = baike.query(entity, attr) if type(ans) == list: cnt += 1 log += str(cnt) + ':来自百科Infobox\n' return (QAT.ptranswer(ans,False), log) elif '-找不到' in ans: cnt += 1 log += str(cnt) + ':百科Infobox查询不到:' + ans + '\n' cnt += 1 log += str(cnt) + '来自搜索\n' (ans, tmplog) = search_summary.kwquery(input_message) log += tmplog elif response.__contains__("NoMatchingTemplate"): cnt += 1 log += str(cnt) + ':匹配不到实体关系模板\n' cnt += 1 log += str(cnt) + ':来自搜索\n' (ans,tmplog) = search_summary.kwquery(input_message) log += tmplog if len(ans) == 0: cnt += 1 log += str(cnt) + ':未查询到答案\n' return (mybot.respond('不知道~'), log) elif len(ans) >1: cnt += 1 log += str(cnt) + ':返回百度摘要\n' if raw == False and ('什么是' in question or '是什么' in question): result = "给你找到几篇新闻:" for a in ans: result += a + '\n' return (result, log) else: raw = False question = question.replace("是什么", "").replace("什么是", "") ans2, log2 = find_ans(question + "是什么") ans1, log1 = find_ans("什么是" + question) if "给你找到几篇新闻" not in ans1: return (ans1, log1) else: return (ans2, log2) else: return (clean_str(ans[0]), log) # 直接匹配问句模版 else: cnt += 1 log += str(cnt) + ':匹配问句模板\n' return (clean_str(response), log)