Ejemplo n.º 1
0
def run(query):
    #if __name__ == '__main__':

    #初始化jb分词器
    T.jieba_initialize()

    #切换到语料库所在工作目录
    mybot_path = './'
    os.chdir(mybot_path)

    mybot = aiml.Kernel()
    mybot.learn(os.path.split(os.path.realpath(__file__))[0]+"/resources/std-startup.xml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/OrdinaryQuestion.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/Common conversation.aiml")
    if len(query) > 60:
        answer = '句子长度过长'
    elif query.strip() == '':
        answer = mybot.respond('无')
    else:
        message = T.wordSegment(query)
        words = T.postag(query)
        response = mybot.respond(message)
        if response == '':
            answer = mybot.respond('找不到答案')
        elif response[0] == '#':
            if response.__contains__("searchbaike"):
                res = response.split(':')
                entity = str(res[1]).replace(" ","")
                attr = str(res[2]).replace(" ","")
                ans = baike.query(entity, attr)
                if '找不到' not in ans:
                    answer = ans

                elif ans.__contains__('找不到'):
                    answer = search_summary.kwquery(query)
                    if len(answer) == 0:
                        answer = mybot.respond('找不到答案')
                    elif len(answer) == 1:
                        print(answer)
                        answer = answer[0].strip().replace(' ','').replace("\n","")
                    else:
                        answer = '找不到答案'
            else:
                answer = '找不到答案'
        else:
            answer = search_summary.kwquery(query)
            if len(answer) == 0:
                answer = mybot.respond('找不到答案')
            elif len(answer) == 1:
                answer = answer[0].strip().replace(' ','').replace("\n","")
            else:
                answer = '找不到答案'                   
    return answer
Ejemplo n.º 2
0
def qa():

    #初始化jb分词器
    T.jieba_initialize()

    #切换到语料库所在工作目录
    mybot_path = './'
    # os.chdir(mybot_path)

    mybot = aiml.Kernel()
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/std-startup.xml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/Common conversation.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/OrdinaryQuestion.aiml")
    # mybot.respond('Load Doc Snake')
    #载入百科属性列表

    print '''
.----------------.  .-----------------. .----------------.  .----------------.  .----------------.
| .--------------. || .--------------. || .--------------. || .--------------. || .--------------. |
| |    _______   | || | ____  _____  | || |      __      | || |  ___  ____   | || |  _________   | |
| |   /  ___  |  | || ||_   \|_   _| | || |     /  \     | || | |_  ||_  _|  | || | |_   ___  |  | |
| |  |  (__ \_|  | || |  |   \ | |   | || |    / /\ \    | || |   | |_/ /    | || |   | |_  \_|  | |
| |   '.___`-.   | || |  | |\ \| |   | || |   / /__\ \   | || |   |  __'.    | || |   |  _|  _   | |
| |  |`\____) |  | || | _| |_\   |_  | || | _/ /    \ \_ | || |  _| |  \ \_  | || |  _| |___/ |  | |
| |  |_______.'  | || ||_____|\____| | || ||____|  |____|| || | |____||____| | || | |_________|  | |
| |              | || |              | || |              | || |              | || |              | |
| '--------------' || '--------------' || '--------------' || '--------------' || '--------------' |
 '----------------'  '----------------'  '----------------'  '----------------'  '----------------'
 Eric:你好,我是Eric。╭(╯^╰)╮
    '''

    while True:
        input_message = raw_input("Enter your message >> ")

        if len(input_message) > 60:
            print mybot.respond("句子长度过长")
            continue
        elif input_message.strip() == '':
            print mybot.respond("无")
            continue

        print input_message
        message = T.wordSegment(input_message)
        # 去标点
        print 'word Seg:' + message
        print '词性:'
        words = T.postag(input_message)

        if message == 'q':
            exit()
        else:
            response = mybot.respond(message)

            print "======="
            print response
            print "======="

            if response == "":
                ans = mybot.respond('找不到答案')
                print 'Eric:' + ans
            # 百科搜索
            elif response[0] == '#':
                # 匹配百科
                if response.__contains__("searchbaike"):
                    print "searchbaike"
                    print response
                    res = response.split(':')
                    #实体
                    entity = str(res[1]).replace(" ", "")
                    #属性
                    attr = str(res[2]).replace(" ", "")
                    print entity + '<---->' + attr

                    ans = baike.query(entity, attr)
                    # 如果命中答案
                    if type(ans) == list:
                        print 'Eric:' + QAT.ptranswer(ans, False)
                        continue
                    elif ans.decode('utf-8').__contains__(u'::找不到'):
                        #百度摘要+Bing摘要
                        print "通用搜索"
                        ans = search_summary.kwquery(input_message)

                # 匹配不到模版,通用查询
                elif response.__contains__("NoMatchingTemplate"):
                    print "NoMatchingTemplate"
                    ans = search_summary.kwquery(input_message)

                if len(ans) == 0:
                    ans = mybot.respond('找不到答案')
                    print 'Eric:' + ans
                elif len(ans) > 1:
                    print "不确定候选答案"
                    print 'Eric: '
                    for a in ans:
                        print a.encode("utf8")
                else:
                    print 'Eric:' + ans[0].encode("utf8")

            # 匹配模版
            else:
                print 'Eric:' + response
Ejemplo n.º 3
0
def QA(input_message, mybot):
    findAns = False
    reply = ''
    ansdict = {}
    dbname = 'zwgx'  #数据库名
    dbip = 'localhost'  #数据库IPlocalhost
    dbport = 3306  #数据库端口
    dbusername = '******'  #数据库用户名
    dbpassword = '******'  #数据库密码root
    schoolname = ''
    intention = ''
    if len(input_message) > 60:
        reply = mybot.respond("句子长度过长")
        findAns = True
    elif input_message.strip() == '无':
        reply = mybot.respond("无")
        findAns = True

    if (findAns == False):
        #print input_message
        #传入一个b''未加工的对象
        message = T.wordSegment(input_message)
        # 分词去标点
        if message == 'q':
            exit()
        else:
            print 'word Seg:' + message
            print utf_to_bgk('词性:')
            words = T.postag(input_message)  #词性标注 格式 词/词性
            for w in words:
                print w.word, w.flag
                if w.flag == 'school':
                    try:
                        #先用自定义的分词处理得到对应的词性,然后根据词性到数据库查询
                        db = pymysql.connect(host=dbip,
                                             user=dbusername,
                                             passwd=dbpassword,
                                             db=dbname,
                                             charset="utf8")
                        cursor = db.cursor()
                        sql = u"SELECT `学校名` FROM 学校简称 WHERE `简称`='" + w.word + "'"
                        # 执行SQL语句
                        cursor.execute(sql)
                        # 获取所有记录列表,已验证数据库查询跑通
                        results = cursor.fetchall()
                        #print results
                        #替换简称
                        if len(results) > 0:
                            input_message = input_message.replace(
                                w.word, results[0][0]).__str__()
                            w.flag = 'nt'
                            w.word = results[0][0]
                        print utf_to_bgk(input_message), utf_to_bgk(
                            w.word), utf_to_bgk(w.flag)
                        # 关闭数据库连接
                        db.close()
                    except Exception as e:
                        print(e)
                # 识别学校简称并配对数据库中已存内容
                if w.flag == 'x' or w.flag == 'nt':
                    try:
                        db = pymysql.connect(host=dbip,
                                             user=dbusername,
                                             passwd=dbpassword,
                                             db=dbname,
                                             charset="utf8")
                        cursor = db.cursor()
                        sql = u"SELECT `属性`,`内容` FROM school WHERE `学校`='" + w.word + "'"
                        # 执行SQL语句
                        cursor.execute(sql)

                        # 获取所有记录列表
                        results = cursor.fetchall()
                        #print u'flag转化之后的查询',results
                        if len(results) > 0:
                            for row in results:
                                ansdict[row[0]] = row[1]
                                #print row[0],row[1]
                                # reply +=row[0].encode("utf8")
                                # reply+=" ".encode("utf8")
                            # shuxing=raw_input('Frank:你想了解什么属性 ' + reply+">>")
                            # sql = u"SELECT `内容` FROM school WHERE `学校`='" + w.word + u"'AND `属性`='"+shuxing+"'"
                            # cursor.execute(sql)
                            # results = cursor.fetchall()
                            # if len(results)>0:
                            #     print "Frank: "+results[0][0].encode("utf8")
                            #     reply=results[0][0].encode("utf8")
                            #     return reply
                        # 关闭数据库连接
                        #print u'查询之后的结果储存',ansdict 将数据库中的所有信息写入到ansdict中
                        db.close()
                    except Exception as e:
                        print(e)
                #todo: 每个词去找查数据库可以优化一下 加一下词性判断
                #获得学校的名称
                if FindSchool(dbip, dbusername, dbpassword, dbname,
                              w.word) != "":
                    schoolname = FindSchool(dbip, dbusername, dbpassword,
                                            dbname, w.word)

            uni = input_message.strip().decode('utf-8')
            print u'查看返回值', uni, utf_to_bgk(input_message.strip())
            response = mybot.respond(
                input_message.strip())  #如果未给传入参数转化为utf8则报错

            print "======="
            #print response
            print "=======+"

            if response == "":
                reply = mybot.respond('找不到答案')
                findAns = True
                print 'Frank1:' + utf_to_bgk(reply)


# *********************************************************************************
# 百科搜索  aiml机器人没有没有
            elif response[0] == '#':
                # 匹配百科
                # if response.__contains__("searchbaike"):
                #     print "searchbaike"
                #     print response
                #     res = response.split(':')
                #     # 实体
                #     entity = str(res[1]).replace(" ", "")
                #     # 属性
                #     attr = str(res[2]).replace(" ", "")
                #     print entity + '<---->' + attr
                #
                #     ans = baike.query(entity, attr)
                #     # 如果命中答案
                #     if type(ans) == list:
                #         print 'Frank:' + QAT.ptranswer(ans, False)
                #         reply = QAT.ptranswer(ans, False)
                #         findAns = True
                #     elif ans.decode('utf-8').__contains__(u'::找不到'):
                #         # 百度摘要+Bing摘要
                #         print "通用搜索"
                #         ans = search_summary.kwquery(input_message)
                #
                # # 匹配不到模版,通用查询
                # elif response.__contains__("NoMatchingTemplate"):
                #     print "NoMatchingTemplate"
                #
                #当复杂问题时,通过分类器模型进行分类再查询
                if (schoolname != ""):
                    sock = socket(AF_INET, SOCK_STREAM)
                    sock.connect(('127.0.0.1', 50009))
                    sock.sendall(input_message.encode("utf-8"))
                    intention = sock.recv(1024)
                    sock.close()
                    print utf_to_bgk(intention), u'经过分类器处理后的结果'

                #经过dl识别后分类问题,如果问题在数据库中,即把问题分类为数据库的一个属性,再调用属性值,可以增加数据库的属性分类和值
                if unicode(intention) in ansdict:
                    reply = ansdict[unicode(intention)]
                    #print 'Frank:' + reply.encode("utf8")
                    #print 'Frank2:' + utf_to_bgk(reply)

                #如果问题没有在数据库预存储
                else:
                    TempDict = search_summary.kwquery(input_message, intention,
                                                      schoolname)
                    ansdict['schoolname'] = TempDict['schoolname']
                    ansdict['intention'] = TempDict['intention']
                    ansdict['index'] = TempDict['index']
                    ans = TempDict['answer']
                    if (findAns == False):
                        if len(ans) == 0:
                            ans = mybot.respond('找不到答案')
                            #print 'Frank3:' + utf_to_bgk(ans)
                            reply = ans
                            findAns = True
                        elif len(ans) > 1:
                            print u"不确定候选答案"
                            print 'Frank4: '
                            for a in ans:
                                print a.encode("utf8")
                                reply += a.encode("utf8") + '\n'
                            findAns = True
                        else:
                            #print 'Frank5:' + ans[0].encode("utf8")
                            reply = ans[0].encode("utf8")
                            findAns = True

            # 匹配模版
            else:
                print 'Frank6:' + utf_to_bgk(response)
                reply = response
                findAns = True

    ansdict['baidu'] = reply
    json_s = json.dumps(ansdict)
    return json_s
Ejemplo n.º 4
0
    while True:
        input_message = raw_input("Enter your message >> ")

        if len(input_message) > 60:
            print mybot.respond("句子长度过长")
            continue
        elif input_message.strip() == '':
            print mybot.respond("无")
            continue

        print input_message
        message = T.wordSegment(input_message)
        # 去标点
        print 'word Seg:' + message
        print '词性:'
        words = T.postag(input_message)

        if message == 'q':
            exit()
        else:
            response = mybot.respond(message)

            print "======="
            print response
            print "======="

            if response == "":
                ans = mybot.respond('找不到答案')
                print 'Eric:' + ans
            # 百科搜索
            elif response[0] == '#':
Ejemplo n.º 5
0
def ws():
    user_socket = request.environ.get('wsgi.websocket')  # type:WebSocket
    while 1:
        msg =user_socket.receive()
        question = json.loads(msg)
        q = question['data']['mine']['content']
        msg =q
        input_message = str(msg).encode('utf-8')
        if len(input_message) > 60:
            answer =  mybot.respond("句子长度过长")
            # continue
        elif input_message.strip() == '':
            answer = mybot.respond("无话可说")
            # continue

        # print input_message
        message = T.wordSegment(input_message)
        # 去标点
        # print 'word Seg:'+ message
        # print '词性:'
        words = T.postag(input_message)

        if message == 'q':
            exit()
        else:
            response = mybot.respond(message)  # 在AIML数据集里寻找答案

            print "======="
            if response[0] == '#':
                print response + 'mark'

            else:
               answer =  response

            print "======="

            if response == "":
                ans = mybot.respond('找不到答案')
                answer = ans
            # 百科搜索
            elif response[0] == '#' or len(response) < 1:
                # 匹配百科
                if response.__contains__("searchbaike"):
                    print "searchbaike"
                    print response
                    res = response.split(':')
                    # 实体
                    entity = str(res[1]).replace(" ", "")
                    # 属性
                    attr = str(res[2]).replace(" ", "")
                    print entity + '<---->' + attr

                    ans = baike.query(entity, attr)

                    # 如果命中答案
                    print type(ans)
                    if type(ans) == list:
                        answer = '回答:' + QAT.ptranswer(ans, False)
                        # continue
                    elif ans.decode('utf-8').__contains__(u'::找不到'):
                        # 百度摘要+Bing摘要
                        print "通用搜索"
                        answer = search_summary.kwquery(input_message)

                # 匹配不到模版,通用查询
                elif response.__contains__("NoMatchingTemplate"):
                    print "NoMatchingTemplate"
                    ans = search_summary.kwquery(input_message)
                    print type(ans)

                if len(ans) == 0:
                    ans = mybot.respond('找不到答案')
                    answer = '回答:' + ans
                elif len(ans) > 1:
                    print "不确定候选答案"
                    answer = ans[0]
                    print 'Eric: '
                    for a in ans:
                        print a.encode("utf-8")
                else:
                    answer = '回答:' + ans[0].encode("utf-8")



            # 匹配模版
            else:
                answer = '回答:' + response

        s = '展开全部'
        print type(answer).__name__

        if (type(answer).__name__ == 'list' and len(answer)>0) or '唔... 怎么回答...'in answer or '天气' in msg:
            answer = geta(msg)
            answer = answer
            print 'wocao'
        else:
            #print answer
            if s in str(answer):
                print answer
                answer = str(answer).replace('\n', '').replace('展开全部', "").split('已赞过')[0]
                print  'OS' +answer
        print user_socket,msg

        res =answer
        a = {
            "username": "******",
            "avatar": "https://robot.rszhang.top/images/icon/nv/0.jpg",
            "id": "-2",  # //消息的来源ID(如果是私聊,则是用户id,如果是群聊,则是群组id)
            "type": "friend",  # //聊天窗口来源类型,从发送消息传递的to里面获取
            "content": res,  # //消息内容
            "cid": 0,  # //消息id,可不传。除非你要对消息进行一些操作(如撤回)
            "mine": True,  # //是否我发送的消息,如果为true,则会显示在右方
            "fromid": "100000",  # /消息的发送者id(比如群组中的某个消息发送者),可用于自动解决浏览器多窗口时的一些问题
            "timestamp": 1467475443306,  # //服务端时间戳毫秒数。注意:如果你返回的是标准的 unix 时间戳,记得要 *1000
        }

        user_socket.send(json.dumps(a))
Ejemplo n.º 6
0
def kwquery(query):
    #分词 去停用词 抽取关键词
    keywords = []
    words = T.postag(query)
    for k in words:
        # 只保留名词
        if k.flag.__contains__("n"):
            # print k.flag
            # print k.word
            keywords.append(k.word)

    answer = []
    text = ''
    # 找到百科的答案就置1
    flag = 0

    # 抓取百度前10条的摘要
    soup_baidu = To.get_html_baidu('https://www.baidu.com/s?wd=' +
                                   quote(query))

    for i in range(1, 10):
        if soup_baidu == None:
            break
        results = soup_baidu.find(id=i)

        if results == None:
            print "百度摘要找不到答案"
            break
        # print '============='
        # print results.attrs
        # print type(results.attrs)
        # print results['class']
        #判断是否有mu,如果第一个是百度知识图谱的 就直接命中答案
        if results.attrs.has_key('mu') and i == 1:
            # print results.attrs["mu"]
            r = results.find(class_='op_exactqa_s_answer')

            # print r
            if r == None:
                print "百度知识图谱找不到答案"
                # continue
            else:
                # print r.get_text()
                print "百度知识图谱找到答案"
                answer.append(r.get_text().strip())
                flag = 1
                break

        #古诗词判断
        if results.attrs.has_key('mu') and i == 1:
            r = results.find(class_="op_exactqa_detail_s_answer")

            if r == None:
                print "百度诗词找不到答案"
                # continue
            else:
                # print r.get_text()
                print "百度诗词找到答案"
                answer.append(r.get_text().strip())
                flag = 1
                break

        #计算器
        if results.attrs.has_key(
                'mu') and i == 1 and results.attrs['mu'].__contains__(
                    'http://open.baidu.com/static/calculator/calculator.html'):

            r = results.find('div').find_all('td')[1].find_all('div')[1]

            if r == None:
                print "计算器找不到答案"
                # continue
            else:
                # print r.get_text()
                print "计算器找到答案"
                answer.append(r.get_text().strip())
                flag = 1
                break

        # 百度知道答案
        if results.attrs.has_key('mu') and i == 1:
            r = results.find(class_='op_best_answer_question_link')
            if r == None:
                print "百度知道图谱找不到答案"
            else:
                print "百度知道图谱找到答案"
                url = r['href']
                zhidao_soup = To.get_html_zhidao(url)
                r = zhidao_soup.find(class_='bd answer').find('pre')
                answer.append(r.get_text())
                flag = 1
                break

        if results.find("h3") != None:
            # 百度知道
            if results.find("h3").find("a").get_text().__contains__(
                    u"百度知道") and i == 1:
                url = results.find("h3").find("a")['href']
                if url == None:
                    print "百度知道图谱找不到答案"
                    continue
                else:
                    print "百度知道图谱找到答案"
                    zhidao_soup = To.get_html_zhidao(url)

                    r = zhidao_soup.find(class_='bd answer')
                    if r == None:
                        continue
                    else:
                        r = r.find('pre')

                    answer.append(r.get_text().strip())
                    flag = 1
                    break

            # 百度百科
            if results.find("h3").find("a").get_text().__contains__(
                    u"百度百科") and i == 1:
                url = results.find("h3").find("a")['href']
                if url == None:
                    print "百度百科找不到答案"
                    continue
                else:
                    print "百度百科找到答案"
                    baike_soup = To.get_html_baike(url)

                    r = baike_soup.find(class_='lemma-summary')
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n", "").strip()
                    answer.append(r)
                    flag = 1
                    break
        text += results.get_text()

    if flag == 1:
        return answer

    #获取bing的摘要
    soup_bing = To.get_html_bing('https://www.bing.com/search?q=' +
                                 quote(query))
    # 判断是否在Bing的知识图谱中
    # bingbaike = soup_bing.find(class_="b_xlText b_emphText")
    bingbaike = soup_bing.find(class_="bm_box")

    if bingbaike != None:
        if bingbaike.find_all(class_="b_vList")[1] != None:
            if bingbaike.find_all(class_="b_vList")[1].find("li") != None:
                print "Bing知识图谱找到答案"
                flag = 1
                answer.append(bingbaike.get_text())
                # print "====="
                # print answer
                # print "====="
                return answer
    else:
        print "Bing知识图谱找不到答案"
        results = soup_bing.find(id="b_results")
        bing_list = results.find_all('li')
        for bl in bing_list:
            temp = bl.get_text()
            if temp.__contains__(u" - 必应网典"):
                print "查找Bing网典"
                url = bl.find("h2").find("a")['href']
                if url == None:
                    print "Bing网典找不到答案"
                    continue
                else:
                    print "Bing网典找到答案"
                    bingwd_soup = To.get_html_bingwd(url)

                    r = bingwd_soup.find(class_='bk_card_desc').find("p")
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n", "").strip()
                    answer.append(r)
                    flag = 1
                    break

        if flag == 1:
            return answer

        text += results.get_text()

    # print text

    # 如果再两家搜索引擎的知识图谱中都没找到答案,那么就分析摘要
    if flag == 0:
        #分句
        cutlist = [u"。", u"?", u".", u"_", u"-", u":", u"!", u"?"]
        temp = ''
        sentences = []
        for i in range(0, len(text)):
            if text[i] in cutlist:
                if temp == '':
                    continue
                else:
                    # print temp
                    sentences.append(temp)
                temp = ''
            else:
                temp += text[i]

        # 找到含有关键词的句子,去除无关的句子
        key_sentences = {}
        for s in sentences:
            for k in keywords:
                if k in s:
                    key_sentences[s] = 1

        # 根据问题制定规则

        # 识别人名
        target_list = {}
        for ks in key_sentences:
            # print ks
            words = T.postag(ks)
            for w in words:
                # print "====="
                # print w.word
                if w.flag == ("nr"):
                    if target_list.has_key(w.word):
                        target_list[w.word] += 1
                    else:
                        target_list[w.word] = 1

        # 找出最大词频
        sorted_lists = sorted(target_list.items(),
                              lambda x, y: cmp(x[1], y[1]),
                              reverse=True)
        # print len(target_list)
        #去除问句中的关键词
        sorted_lists2 = []
        # 候选队列
        for i, st in enumerate(sorted_lists):
            # print st[0]
            if st[0] in keywords:
                continue
            else:
                sorted_lists2.append(st)

        print "返回前n个词频"
        answer = []
        for i, st in enumerate(sorted_lists2):
            # print st[0]
            # print st[1]
            if i < 3:
                # print st[0]
                # print st[1]
                answer.append(st[0])
        # print answer

    return answer
Ejemplo n.º 7
0
def kwquery(query):
    #分词 去停用词 抽取关键词
    keywords = []
    words = T.postag(query)
    for k in words:
        # 只保留名词
        if k.flag.__contains__("n"):
            # print k.flag
            # print k.word
            keywords.append(k.word)

    answer = []
    text = ''
    # 找到答案就置1
    flag = 0


    # 抓取百度前10条的摘要
    soup_baidu = To.get_html_baidu('https://www.baidu.com/s?wd='+quote(query))

    for i in range(1,10):
        if soup_baidu == None:
            break
        results = soup_baidu.find(id=i)

        if results == None:
            print "百度摘要找不到答案"
            break
        # print '============='
        # print results.attrs
        # print type(results.attrs)
        # print results['class']
        #判断是否有mu,如果第一个是百度知识图谱的 就直接命中答案
        if results.attrs.has_key('mu') and i == 1:
            # print results.attrs["mu"]
            r = results.find(class_='op_exactqa_s_answer')
            if r == None:
                print "百度知识图谱找不到答案"
            else:
                # print r.get_text()
                print "百度知识图谱找到答案"
                answer.append(r.get_text().strip())
                flag = 1
                break


        #古诗词判断
        if results.attrs.has_key('mu') and i == 1:
            r = results.find(class_="op_exactqa_detail_s_answer")
            if r == None:
                print "百度诗词找不到答案"
            else:
                # print r.get_text()
                print "百度诗词找到答案"
                answer.append(r.get_text().strip())
                flag = 1
                break

        #万年历 & 日期
        if results.attrs.has_key('mu') and i == 1 and results.attrs['mu'].__contains__('http://open.baidu.com/calendar'):
            r = results.find(class_="op-calendar-content")
            if r == None:
                print "百度万年历找不到答案"
            else:
                # print r.get_text()
                print "百度万年历找到答案"
                answer.append(r.get_text().strip().replace("\n","").replace(" ",""))
                flag = 1
                break

        if results.attrs.has_key('tpl') and i == 1 and results.attrs['tpl'].__contains__('calendar_new'):
            r = results.attrs['fk'].replace("6018_","")
            print r

            if r == None:
                print "百度万年历新版找不到答案"
                # continue
            else:
                # print r.get_text()
                print "百度万年历新版找到答案"
                answer.append(r)
                flag = 1
                break


        #计算器
        if results.attrs.has_key('mu') and i == 1 and results.attrs['mu'].__contains__('http://open.baidu.com/static/calculator/calculator.html'):
            r = results.find('div').find_all('td')[1].find_all('div')[1]
            if r == None:
                print "计算器找不到答案"
                # continue
            else:
                # print r.get_text()
                print "计算器找到答案"
                answer.append(r.get_text().strip())
                flag = 1
                break


        # 百度知道答案
        if results.attrs.has_key('mu') and i == 1:
            r = results.find(class_='op_best_answer_question_link')
            if r == None:
                print "百度知道图谱找不到答案"
            else:
                print "百度知道图谱找到答案"
                url = r['href']
                zhidao_soup = To.get_html_zhidao(url)
                r = zhidao_soup.find(class_='bd answer').find('pre')
                if r == None:
                    r = zhidao_soup.find(class_='bd answer').find(class_='line content')

                answer.append(r.get_text())
                flag = 1
                break

        if results.find("h3") != None:
            # 百度知道
            if results.find("h3").find("a").get_text().__contains__(u"百度知道") and (i == 1 or i ==2):
                url = results.find("h3").find("a")['href']
                if url == None:
                    print "百度知道图谱找不到答案"
                    continue
                else:
                    print "百度知道图谱找到答案"
                    zhidao_soup = To.get_html_zhidao(url)

                    r = zhidao_soup.find(class_='bd answer')
                    if r == None:
                        continue
                    else:
                        r = r.find('pre')
                        if r == None :
                            r = zhidao_soup.find(class_='bd answer').find(class_='line content')
                    answer.append(r.get_text().strip())
                    flag = 1
                    break

            # 百度百科
            if results.find("h3").find("a").get_text().__contains__(u"百度百科") and (i == 1 or i ==2):
                url = results.find("h3").find("a")['href']
                if url == None:
                    print "百度百科找不到答案"
                    continue
                else:
                    print "百度百科找到答案"
                    baike_soup = To.get_html_baike(url)

                    r = baike_soup.find(class_='lemma-summary')
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n","").strip()
                    answer.append(r)
                    flag = 1
                    break
        text += results.get_text()

    if flag == 1:
        return answer

    #获取bing的摘要
    soup_bing = To.get_html_bing('https://www.bing.com/search?q='+quote(query))
    # 判断是否在Bing的知识图谱中
    # bingbaike = soup_bing.find(class_="b_xlText b_emphText")
    bingbaike = soup_bing.find(class_="bm_box")

    if bingbaike != None:
        if bingbaike.find_all(class_="b_vList")[1] != None:
            if bingbaike.find_all(class_="b_vList")[1].find("li") != None:
                print "Bing知识图谱找到答案"
                flag = 1
                answer.append(bingbaike.get_text())
                # print "====="
                # print answer
                # print "====="
                return answer
    else:
        print "Bing知识图谱找不到答案"
        results = soup_bing.find(id="b_results")
        bing_list = results.find_all('li')
        for bl in bing_list:
            temp =  bl.get_text()
            if temp.__contains__(u" - 必应网典"):
                print "查找Bing网典"
                url = bl.find("h2").find("a")['href']
                if url == None:
                    print "Bing网典找不到答案"
                    continue
                else:
                    print "Bing网典找到答案"
                    bingwd_soup = To.get_html_bingwd(url)

                    r = bingwd_soup.find(class_='bk_card_desc').find("p")
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n","").strip()
                    answer.append(r)
                    flag = 1
                    break

        if flag == 1:
            return answer

        text += results.get_text()

    # print text


    # 如果再两家搜索引擎的知识图谱中都没找到答案,那么就分析摘要
    if flag == 0:
        #分句
        cutlist = [u"。",u"?",u".", u"_", u"-",u":",u"!",u"?"]
        temp = ''
        sentences = []
        for i in range(0,len(text)):
            if text[i] in cutlist:
                if temp == '':
                    continue
                else:
                    # print temp
                    sentences.append(temp)
                temp = ''
            else:
                temp += text[i]

        # 找到含有关键词的句子,去除无关的句子
        key_sentences = {}
        for s in sentences:
            for k in keywords:
                if k in s:
                    key_sentences[s]=1


        # 根据问题制定规则

        # 识别人名
        target_list = {}
        for ks in key_sentences:
            # print ks
            words = T.postag(ks)
            for w in words:
                # print "====="
                # print w.word
                if w.flag == ("nr"):
                    if target_list.has_key(w.word):
                        target_list[w.word] += 1
                    else:
                        target_list[w.word] = 1

        # 找出最大词频
        sorted_lists = sorted(target_list.items(), lambda x, y: cmp(x[1], y[1]), reverse=True)
        # print len(target_list)
        #去除问句中的关键词
        sorted_lists2 = []
        # 候选队列
        for i, st in enumerate(sorted_lists):
            # print st[0]
            if st[0] in keywords:
                continue
            else:
                sorted_lists2.append(st)

        print "返回前n个词频"
        answer = []
        for i,st in enumerate(sorted_lists2):
            # print st[0]
            # print st[1]
            if i< 3:
                # print st[0]
                # print st[1]
                answer.append(st[0])
        # print answer

    return answer
Ejemplo n.º 8
0
def QA(input_message, mybot):
    findAns = False
    reply = ''
    ansdict = {}
    dbname = 'zwgx'  #数据库名
    dbip = '106.14.124.221'  #数据库IPlocalhost
    dbport = 3306  #数据库端口
    dbusername = '******'  #数据库用户名
    dbpassword = '******'  #数据库密码root
    schoolname = ''
    intention = ''
    if len(input_message) > 60:
        reply = mybot.respond("句子长度过长")
        findAns = True
    elif input_message.strip() == '无':
        reply = mybot.respond("无")
        findAns = True

    if (findAns == False):
        # print input_message
        message = T.wordSegment(input_message)
        # 分词去标点
        if message == 'q':
            exit()
        else:
            print 'word Seg:' + message
            print '词性:'
            words = T.postag(input_message)
            for w in words:
                print w.word, w.flag
                if w.flag == 'school':
                    try:
                        db = pymysql.connect(host=dbip,
                                             user=dbusername,
                                             passwd=dbpassword,
                                             db=dbname,
                                             charset="utf8")
                        cursor = db.cursor()
                        sql = u"SELECT `学校名` FROM 学校简称 WHERE `简称`='" + w.word + "'"
                        # 执行SQL语句
                        cursor.execute(sql)
                        # 获取所有记录列表
                        results = cursor.fetchall()
                        #替换简称
                        if len(results) > 0:
                            input_message = input_message.replace(
                                w.word, results[0][0]).__str__()
                            w.flag = 'nt'
                            w.word = results[0][0]
                        # 关闭数据库连接
                        db.close()
                    except Exception as e:
                        print(e)
                # 识别学校简称并配对数据库中已存内容
                if w.flag == 'x' or w.flag == 'nt':
                    try:
                        db = pymysql.connect(host=dbip,
                                             user=dbusername,
                                             passwd=dbpassword,
                                             db=dbname,
                                             charset="utf8")
                        cursor = db.cursor()
                        sql = u"SELECT `属性`,`内容` FROM school WHERE `学校`='" + w.word + "'"
                        # 执行SQL语句
                        cursor.execute(sql)
                        # 获取所有记录列表
                        results = cursor.fetchall()
                        if len(results) > 0:
                            for row in results:
                                ansdict[row[0]] = row[1]
                                #print row[0],row[1]
                                # reply +=row[0].encode("utf8")
                                # reply+=" ".encode("utf8")
                            # shuxing=raw_input('Frank:你想了解什么属性 ' + reply+">>")
                            # sql = u"SELECT `内容` FROM school WHERE `学校`='" + w.word + u"'AND `属性`='"+shuxing+"'"
                            # cursor.execute(sql)
                            # results = cursor.fetchall()
                            # if len(results)>0:
                            #     print "Frank: "+results[0][0].encode("utf8")
                            #     reply=results[0][0].encode("utf8")
                            #     return reply
                        # 关闭数据库连接
                        db.close()
                    except Exception as e:
                        print(e)
                #todo: 每个词去找查数据库可以优化一下 加一下词性判断
                if FindSchool(dbip, dbusername, dbpassword, dbname,
                              w.word) != "":
                    schoolname = FindSchool(dbip, dbusername, dbpassword,
                                            dbname, w.word)

            response = mybot.respond(input_message.strip())

            print "======="
            print response
            print "======="

            if response == "":
                reply = mybot.respond('找不到答案')
                findAns = True
                print 'Frank:' + reply


# *********************************************************************************
# 百科搜索
            elif response[0] == '#':
                # 匹配百科
                # if response.__contains__("searchbaike"):
                #     print "searchbaike"
                #     print response
                #     res = response.split(':')
                #     # 实体
                #     entity = str(res[1]).replace(" ", "")
                #     # 属性
                #     attr = str(res[2]).replace(" ", "")
                #     print entity + '<---->' + attr
                #
                #     ans = baike.query(entity, attr)
                #     # 如果命中答案
                #     if type(ans) == list:
                #         print 'Frank:' + QAT.ptranswer(ans, False)
                #         reply = QAT.ptranswer(ans, False)
                #         findAns = True
                #     elif ans.decode('utf-8').__contains__(u'::找不到'):
                #         # 百度摘要+Bing摘要
                #         print "通用搜索"
                #         ans = search_summary.kwquery(input_message)
                #
                # # 匹配不到模版,通用查询
                # elif response.__contains__("NoMatchingTemplate"):
                #     print "NoMatchingTemplate"

                if (schoolname != ""):
                    sock = socket(AF_INET, SOCK_STREAM)
                    sock.connect(('127.0.0.1', 50009))
                    sock.sendall(input_message.encode("utf-8"))
                    intention = sock.recv(1024)
                    sock.close()
                    print intention

                if unicode(intention) in ansdict:
                    reply = ansdict[unicode(intention)]
                    print 'Frank:' + reply.encode("utf8")
                else:
                    TempDict = search_summary.kwquery(input_message, intention,
                                                      schoolname)
                    ansdict['schoolname'] = TempDict['schoolname']
                    ansdict['intention'] = TempDict['intention']
                    ansdict['index'] = TempDict['index']
                    ans = TempDict['answer']
                    #*********************************************************************************
                    if (findAns == False):
                        if len(ans) == 0:
                            ans = mybot.respond('找不到答案')
                            print 'Frank:' + ans
                            reply = ans
                            findAns = True
                        elif len(ans) > 1:
                            print "不确定候选答案"
                            print 'Frank: '
                            for a in ans:
                                print a.encode("utf8")
                                reply += a.encode("utf8") + '\n'
                            findAns = True
                        else:
                            print 'Frank:' + ans[0].encode("utf8")
                            reply = ans[0].encode("utf8")
                            findAns = True

            # 匹配模版
            else:
                print 'Frank:' + response
                reply = response
                findAns = True

    ansdict['baidu'] = reply
    json_s = json.dumps(ansdict)
    return json_s
Ejemplo n.º 9
0
    while True:
        input_message = raw_input("Enter your message >> ")

        if len(input_message) > 60:
            print mybot.respond("句子长度过长")
            continue
        elif input_message.strip() == '':
            print mybot.respond("无")
            continue

        print input_message
        message = T.wordSegment(input_message)
        # 去标点
        print 'word Seg:'+ message
        print '词性:'
        words = T.postag(input_message)


        if message == 'q':
            exit()
        else:
            response = mybot.respond(message)

            print "======="
            print response
            print "======="

            if response == "":
                ans = mybot.respond('找不到答案')
                print 'Eric:' + ans
            # 百科搜索
Ejemplo n.º 10
0
def run(question):
    # 初始化jb分词器
    T.jieba_initialize()

    # 切换到语料库所在工作目录
    mybot_path = './'
    os.chdir(mybot_path)

    mybot = aiml.Kernel()
    if os.path.isfile("bot_brain.brn"):
        mybot.bootstrap(brainFile="bot_brain.brn")
    else:
        mybot.learn(
            os.path.split(os.path.realpath(__file__))[0] +
            "/resources/std-startup.xml")
        mybot.learn(
            os.path.split(os.path.realpath(__file__))[0] +
            "/resources/tuling.xml")
        mybot.learn(
            os.path.split(os.path.realpath(__file__))[0] +
            "/resources/bye.aiml")
        mybot.learn(
            os.path.split(os.path.realpath(__file__))[0] +
            "/resources/tools.aiml")
        mybot.learn(
            os.path.split(os.path.realpath(__file__))[0] +
            "/resources/bad.aiml")
        mybot.learn(
            os.path.split(os.path.realpath(__file__))[0] +
            "/resources/funny.aiml")
        mybot.learn(
            os.path.split(os.path.realpath(__file__))[0] +
            "/resources/OrdinaryQuestion.aiml")
        mybot.learn(
            os.path.split(os.path.realpath(__file__))[0] +
            "/resources/Common conversation.aiml")
        #mybot.bootstrap(learnFiles="std-startup.xml", commands="load aiml b")
        mybot.saveBrain("bot_brain.brn")

    # 载入百科属性列表

    print '''
    Eric:你好,我是问答机器人。╭(╯^╰)╮
       '''

    input_message = question

    if len(input_message) > 60:
        return mybot.respond("句子长度过长")
        #continue
    elif input_message.strip() == '':
        return mybot.respond("无话可说")
        #continue

    # print input_message
    message = T.wordSegment(input_message)
    # 去标点
    # print 'word Seg:'+ message
    # print '词性:'
    words = T.postag(input_message)

    if message == 'q':
        exit()
    else:
        response = mybot.respond(message)  # 在AIML数据集里寻找答案

        print "======="
        if response[0] == '#':
            print response + 'mark'
            pass
        else:
            return response

        print "======="

        if response == "":
            ans = mybot.respond('找不到答案')
            print 'Eric:' + ans
        # 百科搜索
        elif response[0] == '#' or len(response) < 1:
            # 匹配百科
            if response.__contains__("searchbaike"):
                print "searchbaike"
                print response
                res = response.split(':')
                # 实体
                entity = str(res[1]).replace(" ", "")
                # 属性
                attr = str(res[2]).replace(" ", "")
                print entity + '<---->' + attr

                ans = baike.query(entity, attr)

                # 如果命中答案
                if type(ans) == list:
                    return '回答:' + QAT.ptranswer(ans, False)
                    #continue
                elif ans.decode('utf-8').__contains__(u'::找不到'):
                    # 百度摘要+Bing摘要
                    print "通用搜索"
                    ans = search_summary.kwquery(input_message)

            # 匹配不到模版,通用查询
            elif response.__contains__("NoMatchingTemplate"):
                print "NoMatchingTemplate"
                ans = search_summary.kwquery(input_message)

            if len(ans) == 0:
                ans = mybot.respond('找不到答案')
                return '回答:' + ans
            elif len(ans) > 1:
                print "不确定候选答案"
                return ans
                print 'Eric: '
                for a in ans:
                    print a.encode("utf-8")
            else:
                return '回答:' + ans[0].encode("utf-8")

        # 匹配模版
        else:
            return '回答:' + response
Ejemplo n.º 11
0
    def handle(self):
        conn = self.request
        conn.sendall('欢迎访问智能百科问答系统')
        Flag =True
        data = conn.recv(4096)
        while Flag:
            input_message = data

            print "input_message====="
            print input_message
            print "=========="

            reply = ''

            if len(input_message) > 60:
                print mybot.respond("句子长度过长")
                reply = mybot.respond("句子长度过长")
                conn.sendall(reply)
                Flag =False
                continue
            elif input_message.strip() == '无':
                print mybot.respond("无")
                reply = mybot.respond("无")
                conn.sendall(reply)
                Flag = False
                continue

            print input_message
            message = T.wordSegment(input_message)
            # 去标点
            print 'word Seg:' + message
            print '词性:'
            words = T.postag(input_message)

            if message == 'q':
                exit()
            else:
                response = mybot.respond(message)

                print "======="
                print response
                print "======="

                if response == "":
                    ans = mybot.respond('找不到答案')
                    print 'Eric:' + ans
                    reply = mybot.respond('找不到答案')
                    conn.sendall(reply)
                    Flag = False

                # 百科搜索
                elif response[0] == '#':
                    # 匹配百科
                    if response.__contains__("searchbaike"):
                        print "searchbaike"
                        print response
                        res = response.split(':')
                        # 实体
                        entity = str(res[1]).replace(" ", "")
                        # 属性
                        attr = str(res[2]).replace(" ", "")
                        print entity + '<---->' + attr

                        ans = baike.query(entity, attr)
                        # 如果命中答案
                        if type(ans) == list:
                            print 'Eric:' + QAT.ptranswer(ans, False)
                            reply = QAT.ptranswer(ans, False)
                            conn.sendall(reply)
                            Flag = False
                            continue
                        elif ans.decode('utf-8').__contains__(u'::找不到'):
                            # 百度摘要+Bing摘要
                            print "通用搜索"
                            ans = search_summary.kwquery(input_message)

                    # 匹配不到模版,通用查询
                    elif response.__contains__("NoMatchingTemplate"):
                        print "NoMatchingTemplate"
                        ans = search_summary.kwquery(input_message)

                    if len(ans) == 0:
                        ans = mybot.respond('找不到答案')
                        print 'Eric:' + ans
                        reply = ans
                        conn.sendall(reply)
                        Flag = False

                    elif len(ans) > 1:
                        print "不确定候选答案"
                        print 'Eric: '
                        for a in ans:
                            print a.encode("utf8")
                            reply += a.encode("utf8") + '\n'
                        conn.sendall(reply)
                        Flag = False
                    else:
                        print 'Eric:' + ans[0].encode("utf8")
                        reply = ans[0].encode("utf8")
                        conn.sendall(reply)
                        Flag = False

                # 匹配模版
                else:
                    print 'Eric:' + response
                    reply = response
                    conn.sendall(reply)
                    Flag = False
Ejemplo n.º 12
0
def qa(question):

    #初始化jb分词器
    T.jieba_initialize()

    #切换到语料库所在工作目录
    mybot_path = './'
    # os.chdir(mybot_path)

    mybot = aiml.Kernel()
    mybot.learn(os.path.split(os.path.realpath(__file__))[0]+"/resources/std-startup.xml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/OrdinaryQuestion.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/Common conversation.aiml")
    # mybot.respond('Load Doc Snake')
    #载入百科属性列表

    print '''
.----------------.  .-----------------. .----------------.  .----------------.  .----------------.
| .--------------. || .--------------. || .--------------. || .--------------. || .--------------. |
| |    _______   | || | ____  _____  | || |      __      | || |  ___  ____   | || |  _________   | |
| |   /  ___  |  | || ||_   \|_   _| | || |     /  \     | || | |_  ||_  _|  | || | |_   ___  |  | |
| |  |  (__ \_|  | || |  |   \ | |   | || |    / /\ \    | || |   | |_/ /    | || |   | |_  \_|  | |
| |   '.___`-.   | || |  | |\ \| |   | || |   / /__\ \   | || |   |  __'.    | || |   |  _|  _   | |
| |  |`\____) |  | || | _| |_\   |_  | || | _/ /    \ \_ | || |  _| |  \ \_  | || |  _| |___/ |  | |
| |  |_______.'  | || ||_____|\____| | || ||____|  |____|| || | |____||____| | || | |_________|  | |
| |              | || |              | || |              | || |              | || |              | |
| '--------------' || '--------------' || '--------------' || '--------------' || '--------------' |
 '----------------'  '----------------'  '----------------'  '----------------'  '----------------'
 Eric:你好,我是Eric。╭(╯^╰)╮
    '''


    input_message = question

    if len(input_message) > 60:
        print mybot.respond("句子长度过长")
    elif input_message.strip() == '':
        print mybot.respond("无")

    print input_message
    message = T.wordSegment(input_message)
    # 去标点
    print 'word Seg:'+ message
    print '词性:'
    words = T.postag(input_message)


    if message == 'q':
        exit()
    else:
        response = mybot.respond(message)

        print "======="
        print response
        print "======="

        if response == "":
            ans = mybot.respond('找不到答案')
            print 'Eric:' + ans
        # 百科搜索
        elif response[0] == '#':
            # 匹配百科
            if response.__contains__("searchbaike"):
                print "searchbaike"
                print response
                res = response.split(':')
                #实体
                entity = str(res[1]).replace(" ","")
                #属性
                attr = str(res[2]).replace(" ","")
                print entity+'<---->'+attr

                ans = baike.query(entity, attr)
                # 如果命中答案
                if type(ans) == list:
                    print 'Eric:' + QAT.ptranswer(ans,False)
                elif ans.decode('utf-8').__contains__(u'::找不到'):
                    #百度摘要+Bing摘要
                    print "通用搜索"
                    ans = search_summary.kwquery(input_message)

            # 匹配不到模版,通用查询
            elif response.__contains__("NoMatchingTemplate"):
                print "NoMatchingTemplate"
                ans = search_summary.kwquery(input_message)


            if len(ans) == 0:
                ans = mybot.respond('找不到答案')
                print 'Eric:' + ans
            elif len(ans) >1:
                print "不确定候选答案"
                print 'Eric: '
                for a in ans:
                    print a.encode("utf8")
            else:
                print 'Eric:' + ans[0].encode("utf8")

        # 匹配模版
        else:
            print 'Eric:' + response
Ejemplo n.º 13
0
def answer(question):
  if len(question) > 600:
      print mybot.respond("句子长度过长")
      raise Exception("Too Long")
  elif question.strip() == '':
      print mybot.respond("无")
      raise Exception("No Input")

  print question
  message = T.wordSegment(question)
  # 去标点
  print 'word Seg:'+ message
  print '词性:'
  words = T.postag(question)


  if message == 'q':
      exit()
  else:
    response = mybot.respond(message)

    print response

    if response == "":
        raise Exception("No Answer")
    # 百科搜索
    elif response[0] == '#':
      # 匹配百科
      if response.__contains__("searchbaike"):
        print "searchbaike"
        print response
        res = response.split(':')
        #实体
        entity = str(res[1]).replace(" ","")
        #属性
        attr = str(res[2]).replace(" ","")
        print entity+'<---->'+attr

        ans = baike.query(entity, attr)
        # 如果命中答案
        if type(ans) == list:
          print 'Eric:' + QAT.ptranswer(ans,False)
          return [QAT.ptranswer(ans,False)]
        elif ans.decode('utf-8').__contains__(u'::找不到'):
          #百度摘要+Bing摘要
          print "通用搜索"
          ans = search_summary.kwquery(question)

      # 匹配不到模版,通用查询
      elif response.__contains__("NoMatchingTemplate"):
        print "NoMatchingTemplate"
        ans = search_summary.kwquery(question)


      if len(ans) == 0:
        raise Exception("No Answer")
      elif len(ans) >1:
        print "不确定候选答案"
        print 'Eric: '
        for a in ans:
          print a.encode("utf8")
        return [a.encode("utf8") for a in ans]
      else:
        print 'Eric:' + ans[0].encode("utf8")
        return [ans[0].encode("utf8")]

    # 匹配模版
    else:
      print 'Eric:' + response
      return [response]
Ejemplo n.º 14
0
def kwquery(query):
    #分词 去停用词 抽取关键词
    keywords = []
    words = T.postag(query)
    for k in words:
        #print(k)
        # 只保留名词
        if k.flag.__contains__("n"):
            # #print k.flag
            # #print k.word
            keywords.append(k.word)

    answer = []
    text = ''
    # 找到答案就置1
    flag = 0
    # 抓取百度前10条的摘要
    soup_baidu = To.get_html_baidu('https://www.baidu.com/s?wd=' +
                                   quote(query))

    for i in range(1, 10):
        if soup_baidu == None:
            break
        results = soup_baidu.find(id=i)
        if results == None:
            ##print("百度百科找不到答案")
            break
        if results.attrs.get('mu') and i == 1:
            # #print results.attrs["mu"]
            r = results.find(class_='op_exactqa_s_answer')
            if r == None:
                pass
                ##print("百度知识图谱找不到答案")
            else:
                ##print("百度知识图谱找到答案")
                answer.append(r.get_text().strip())
                flag = 1
                break

        if flag == 0 and len(keywords) > 1:
            ##print(keywords)

            ans_lst = list(kgquery_entity(keywords[0], keywords[1]))

            if len(ans_lst) != 0:
                answer = ans_lst
                flag = 1

        if results.find("h3") != None and flag == 0:
            if results.find("h3").find("a").get_text().__contains__(
                    u"百度百科") and (i == 1 or i == 2):
                url = results.find("h3").find("a")['href']
                if url == None:
                    ##print("百度百科找不到答案")
                    continue
                else:
                    #print("百度百科找到答案")
                    baike_soup = To.get_html_baike(url)

                    r = baike_soup.find(class_='lemma-summary')
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n", "").strip()
                    answer.append(r)
                    flag = 1
                    break

        text += results.get_text().strip()

    if flag == 0:
        #分句
        cutlist = [u"。", u"?", u".", u"_", u"-", u":", u"!", u"?"]
        temp = ''
        sentences = []
        for i in range(0, len(text)):
            if text[i] in cutlist:
                if temp == '':
                    continue
                else:
                    # #print temp
                    sentences.append(temp)
                temp = ''
            else:
                temp += text[i]
        # 找到含有关键词的句子,去除无关的句子
        key_sentences = {}
        for s in sentences:
            for k in keywords:
                if k in s:
                    key_sentences[s] = 1

        # 识别人名
        target_list = {}
        for ks in key_sentences:
            # #print ks
            words = T.postag(ks)
            for w in words:
                # #print "====="
                # #print w.word
                if w.flag == ("nr"):
                    if target_list.get(w.word):
                        target_list[w.word] += 1
                    else:
                        target_list[w.word] = 1

        sorted_lists = sorted(target_list.items(),
                              key=operator.itemgetter(1),
                              reverse=True)
        sorted_lists2 = []
        # 候选队列
        for i, st in enumerate(sorted_lists):
            # #print st[0]
            if st[0] in keywords:
                continue
            else:
                sorted_lists2.append(st)
        #print("返回前3个词频")
        answer = []
        for i, st in enumerate(sorted_lists2):
            # #print st[0]
            # #print st[1]
            if i < 3:
                # #print st[0]
                # #print st[1]
                answer.append(st[0])
    ##print(answer)
    return answer