Python TextProcess.wordSegment 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: QA.Tools

클래스/타입: TextProcess

메소드/함수: wordSegment

hotexamples.com에서의 예제들: 11

Python TextProcess.wordSegment - 11개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 QA.Tools.TextProcess.wordSegment에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

postag(11)

wordSegment(9)

jieba_initialize(5)

load_baikeattr_name(2)

load_synonyms_word_inattr(2)

예제 #1

파일 보기

def run(query):
    #if __name__ == '__main__':

    #初始化jb分词器
    T.jieba_initialize()

    #切换到语料库所在工作目录
    mybot_path = './'
    os.chdir(mybot_path)

    mybot = aiml.Kernel()
    mybot.learn(os.path.split(os.path.realpath(__file__))[0]+"/resources/std-startup.xml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/OrdinaryQuestion.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/Common conversation.aiml")
    if len(query) > 60:
        answer = '句子长度过长'
    elif query.strip() == '':
        answer = mybot.respond('无')
    else:
        message = T.wordSegment(query)
        words = T.postag(query)
        response = mybot.respond(message)
        if response == '':
            answer = mybot.respond('找不到答案')
        elif response[0] == '#':
            if response.__contains__("searchbaike"):
                res = response.split(':')
                entity = str(res[1]).replace(" ","")
                attr = str(res[2]).replace(" ","")
                ans = baike.query(entity, attr)
                if '找不到' not in ans:
                    answer = ans

                elif ans.__contains__('找不到'):
                    answer = search_summary.kwquery(query)
                    if len(answer) == 0:
                        answer = mybot.respond('找不到答案')
                    elif len(answer) == 1:
                        print(answer)
                        answer = answer[0].strip().replace(' ','').replace("\n","")
                    else:
                        answer = '找不到答案'
            else:
                answer = '找不到答案'
        else:
            answer = search_summary.kwquery(query)
            if len(answer) == 0:
                answer = mybot.respond('找不到答案')
            elif len(answer) == 1:
                answer = answer[0].strip().replace(' ','').replace("\n","")
            else:
                answer = '找不到答案'                   
    return answer

예제 #2

파일 보기

def qa():

    #初始化jb分词器
    T.jieba_initialize()

    #切换到语料库所在工作目录
    mybot_path = './'
    # os.chdir(mybot_path)

    mybot = aiml.Kernel()
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/std-startup.xml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/Common conversation.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/OrdinaryQuestion.aiml")
    # mybot.respond('Load Doc Snake')
    #载入百科属性列表

    print '''
.----------------.  .-----------------. .----------------.  .----------------.  .----------------.
| .--------------. || .--------------. || .--------------. || .--------------. || .--------------. |
| |    _______   | || | ____  _____  | || |      __      | || |  ___  ____   | || |  _________   | |
| |   /  ___  |  | || ||_   \|_   _| | || |     /  \     | || | |_  ||_  _|  | || | |_   ___  |  | |
| |  |  (__ \_|  | || |  |   \ | |   | || |    / /\ \    | || |   | |_/ /    | || |   | |_  \_|  | |
| |   '.___`-.   | || |  | |\ \| |   | || |   / /__\ \   | || |   |  __'.    | || |   |  _|  _   | |
| |  |`\____) |  | || | _| |_\   |_  | || | _/ /    \ \_ | || |  _| |  \ \_  | || |  _| |___/ |  | |
| |  |_______.'  | || ||_____|\____| | || ||____|  |____|| || | |____||____| | || | |_________|  | |
| |              | || |              | || |              | || |              | || |              | |
| '--------------' || '--------------' || '--------------' || '--------------' || '--------------' |
 '----------------'  '----------------'  '----------------'  '----------------'  '----------------'
 Eric：你好，我是Eric。╭(╯^╰)╮
    '''

    while True:
        input_message = raw_input("Enter your message >> ")

        if len(input_message) > 60:
            print mybot.respond("句子长度过长")
            continue
        elif input_message.strip() == '':
            print mybot.respond("无")
            continue

        print input_message
        message = T.wordSegment(input_message)
        # 去标点
        print 'word Seg:' + message
        print '词性：'
        words = T.postag(input_message)

        if message == 'q':
            exit()
        else:
            response = mybot.respond(message)

            print "======="
            print response
            print "======="

            if response == "":
                ans = mybot.respond('找不到答案')
                print 'Eric：' + ans
            # 百科搜索
            elif response[0] == '#':
                # 匹配百科
                if response.__contains__("searchbaike"):
                    print "searchbaike"
                    print response
                    res = response.split(':')
                    #实体
                    entity = str(res[1]).replace(" ", "")
                    #属性
                    attr = str(res[2]).replace(" ", "")
                    print entity + '<---->' + attr

                    ans = baike.query(entity, attr)
                    # 如果命中答案
                    if type(ans) == list:
                        print 'Eric：' + QAT.ptranswer(ans, False)
                        continue
                    elif ans.decode('utf-8').__contains__(u'::找不到'):
                        #百度摘要+Bing摘要
                        print "通用搜索"
                        ans = search_summary.kwquery(input_message)

                # 匹配不到模版，通用查询
                elif response.__contains__("NoMatchingTemplate"):
                    print "NoMatchingTemplate"
                    ans = search_summary.kwquery(input_message)

                if len(ans) == 0:
                    ans = mybot.respond('找不到答案')
                    print 'Eric：' + ans
                elif len(ans) > 1:
                    print "不确定候选答案"
                    print 'Eric: '
                    for a in ans:
                        print a.encode("utf8")
                else:
                    print 'Eric：' + ans[0].encode("utf8")

            # 匹配模版
            else:
                print 'Eric：' + response

예제 #3

파일 보기

def QA(input_message, mybot):
    findAns = False
    reply = ''
    ansdict = {}
    dbname = 'zwgx'  #数据库名
    dbip = 'localhost'  #数据库IPlocalhost
    dbport = 3306  #数据库端口
    dbusername = '******'  #数据库用户名
    dbpassword = '******'  #数据库密码root
    schoolname = ''
    intention = ''
    if len(input_message) > 60:
        reply = mybot.respond("句子长度过长")
        findAns = True
    elif input_message.strip() == '无':
        reply = mybot.respond("无")
        findAns = True

    if (findAns == False):
        #print input_message
        #传入一个b''未加工的对象
        message = T.wordSegment(input_message)
        # 分词去标点
        if message == 'q':
            exit()
        else:
            print 'word Seg:' + message
            print utf_to_bgk('词性：')
            words = T.postag(input_message)  #词性标注 格式 词/词性
            for w in words:
                print w.word, w.flag
                if w.flag == 'school':
                    try:
                        #先用自定义的分词处理得到对应的词性，然后根据词性到数据库查询
                        db = pymysql.connect(host=dbip,
                                             user=dbusername,
                                             passwd=dbpassword,
                                             db=dbname,
                                             charset="utf8")
                        cursor = db.cursor()
                        sql = u"SELECT `学校名` FROM 学校简称 WHERE `简称`='" + w.word + "'"
                        # 执行SQL语句
                        cursor.execute(sql)
                        # 获取所有记录列表,已验证数据库查询跑通
                        results = cursor.fetchall()
                        #print results
                        #替换简称
                        if len(results) > 0:
                            input_message = input_message.replace(
                                w.word, results[0][0]).__str__()
                            w.flag = 'nt'
                            w.word = results[0][0]
                        print utf_to_bgk(input_message), utf_to_bgk(
                            w.word), utf_to_bgk(w.flag)
                        # 关闭数据库连接
                        db.close()
                    except Exception as e:
                        print(e)
                # 识别学校简称并配对数据库中已存内容
                if w.flag == 'x' or w.flag == 'nt':
                    try:
                        db = pymysql.connect(host=dbip,
                                             user=dbusername,
                                             passwd=dbpassword,
                                             db=dbname,
                                             charset="utf8")
                        cursor = db.cursor()
                        sql = u"SELECT `属性`,`内容` FROM school WHERE `学校`='" + w.word + "'"
                        # 执行SQL语句
                        cursor.execute(sql)

                        # 获取所有记录列表
                        results = cursor.fetchall()
                        #print u'flag转化之后的查询',results
                        if len(results) > 0:
                            for row in results:
                                ansdict[row[0]] = row[1]
                                #print row[0],row[1]
                                # reply +=row[0].encode("utf8")
                                # reply+=" ".encode("utf8")
                            # shuxing=raw_input('Frank：你想了解什么属性 ' + reply+">>")
                            # sql = u"SELECT `内容` FROM school WHERE `学校`='" + w.word + u"'AND `属性`='"+shuxing+"'"
                            # cursor.execute(sql)
                            # results = cursor.fetchall()
                            # if len(results)>0:
                            #     print "Frank： "+results[0][0].encode("utf8")
                            #     reply=results[0][0].encode("utf8")
                            #     return reply
                        # 关闭数据库连接
                        #print u'查询之后的结果储存',ansdict 将数据库中的所有信息写入到ansdict中
                        db.close()
                    except Exception as e:
                        print(e)
                #todo： 每个词去找查数据库可以优化一下 加一下词性判断
                #获得学校的名称
                if FindSchool(dbip, dbusername, dbpassword, dbname,
                              w.word) != "":
                    schoolname = FindSchool(dbip, dbusername, dbpassword,
                                            dbname, w.word)

            uni = input_message.strip().decode('utf-8')
            print u'查看返回值', uni, utf_to_bgk(input_message.strip())
            response = mybot.respond(
                input_message.strip())  #如果未给传入参数转化为utf8则报错

            print "======="
            #print response
            print "=======+"

            if response == "":
                reply = mybot.respond('找不到答案')
                findAns = True
                print 'Frank1：' + utf_to_bgk(reply)


# *********************************************************************************
# 百科搜索  aiml机器人没有没有
            elif response[0] == '#':
                # 匹配百科
                # if response.__contains__("searchbaike"):
                #     print "searchbaike"
                #     print response
                #     res = response.split(':')
                #     # 实体
                #     entity = str(res[1]).replace(" ", "")
                #     # 属性
                #     attr = str(res[2]).replace(" ", "")
                #     print entity + '<---->' + attr
                #
                #     ans = baike.query(entity, attr)
                #     # 如果命中答案
                #     if type(ans) == list:
                #         print 'Frank：' + QAT.ptranswer(ans, False)
                #         reply = QAT.ptranswer(ans, False)
                #         findAns = True
                #     elif ans.decode('utf-8').__contains__(u'::找不到'):
                #         # 百度摘要+Bing摘要
                #         print "通用搜索"
                #         ans = search_summary.kwquery(input_message)
                #
                # # 匹配不到模版，通用查询
                # elif response.__contains__("NoMatchingTemplate"):
                #     print "NoMatchingTemplate"
                #
                #当复杂问题时，通过分类器模型进行分类再查询
                if (schoolname != ""):
                    sock = socket(AF_INET, SOCK_STREAM)
                    sock.connect(('127.0.0.1', 50009))
                    sock.sendall(input_message.encode("utf-8"))
                    intention = sock.recv(1024)
                    sock.close()
                    print utf_to_bgk(intention), u'经过分类器处理后的结果'

                #经过dl识别后分类问题，如果问题在数据库中，即把问题分类为数据库的一个属性，再调用属性值，可以增加数据库的属性分类和值
                if unicode(intention) in ansdict:
                    reply = ansdict[unicode(intention)]
                    #print 'Frank：' + reply.encode("utf8")
                    #print 'Frank2：' + utf_to_bgk(reply)

                #如果问题没有在数据库预存储
                else:
                    TempDict = search_summary.kwquery(input_message, intention,
                                                      schoolname)
                    ansdict['schoolname'] = TempDict['schoolname']
                    ansdict['intention'] = TempDict['intention']
                    ansdict['index'] = TempDict['index']
                    ans = TempDict['answer']
                    if (findAns == False):
                        if len(ans) == 0:
                            ans = mybot.respond('找不到答案')
                            #print 'Frank3：' + utf_to_bgk(ans)
                            reply = ans
                            findAns = True
                        elif len(ans) > 1:
                            print u"不确定候选答案"
                            print 'Frank4: '
                            for a in ans:
                                print a.encode("utf8")
                                reply += a.encode("utf8") + '\n'
                            findAns = True
                        else:
                            #print 'Frank5：' + ans[0].encode("utf8")
                            reply = ans[0].encode("utf8")
                            findAns = True

            # 匹配模版
            else:
                print 'Frank6：' + utf_to_bgk(response)
                reply = response
                findAns = True

    ansdict['baidu'] = reply
    json_s = json.dumps(ansdict)
    return json_s

예제 #4

파일 보기

파일: MainProgram.py 프로젝트: yanzhen74/QA-Snake

 '----------------'  '----------------'  '----------------'  '----------------'  '----------------'
 Eric：你好，我是Eric。╭(╯^╰)╮
    '''

    while True:
        input_message = raw_input("Enter your message >> ")

        if len(input_message) > 60:
            print mybot.respond("句子长度过长")
            continue
        elif input_message.strip() == '':
            print mybot.respond("无")
            continue

        print input_message
        message = T.wordSegment(input_message)
        # 去标点
        print 'word Seg:' + message
        print '词性：'
        words = T.postag(input_message)

        if message == 'q':
            exit()
        else:
            response = mybot.respond(message)

            print "======="
            print response
            print "======="

            if response == "":

예제 #5

파일 보기

파일: app.py 프로젝트: iiclear/bug_fix0

def ws():
    user_socket = request.environ.get('wsgi.websocket')  # type:WebSocket
    while 1:
        msg =user_socket.receive()
        question = json.loads(msg)
        q = question['data']['mine']['content']
        msg =q
        input_message = str(msg).encode('utf-8')
        if len(input_message) > 60:
            answer =  mybot.respond("句子长度过长")
            # continue
        elif input_message.strip() == '':
            answer = mybot.respond("无话可说")
            # continue

        # print input_message
        message = T.wordSegment(input_message)
        # 去标点
        # print 'word Seg:'+ message
        # print '词性：'
        words = T.postag(input_message)

        if message == 'q':
            exit()
        else:
            response = mybot.respond(message)  # 在AIML数据集里寻找答案

            print "======="
            if response[0] == '#':
                print response + 'mark'

            else:
               answer =  response

            print "======="

            if response == "":
                ans = mybot.respond('找不到答案')
                answer = ans
            # 百科搜索
            elif response[0] == '#' or len(response) < 1:
                # 匹配百科
                if response.__contains__("searchbaike"):
                    print "searchbaike"
                    print response
                    res = response.split(':')
                    # 实体
                    entity = str(res[1]).replace(" ", "")
                    # 属性
                    attr = str(res[2]).replace(" ", "")
                    print entity + '<---->' + attr

                    ans = baike.query(entity, attr)

                    # 如果命中答案
                    print type(ans)
                    if type(ans) == list:
                        answer = '回答：' + QAT.ptranswer(ans, False)
                        # continue
                    elif ans.decode('utf-8').__contains__(u'::找不到'):
                        # 百度摘要+Bing摘要
                        print "通用搜索"
                        answer = search_summary.kwquery(input_message)

                # 匹配不到模版，通用查询
                elif response.__contains__("NoMatchingTemplate"):
                    print "NoMatchingTemplate"
                    ans = search_summary.kwquery(input_message)
                    print type(ans)

                if len(ans) == 0:
                    ans = mybot.respond('找不到答案')
                    answer = '回答：' + ans
                elif len(ans) > 1:
                    print "不确定候选答案"
                    answer = ans[0]
                    print 'Eric: '
                    for a in ans:
                        print a.encode("utf-8")
                else:
                    answer = '回答：' + ans[0].encode("utf-8")



            # 匹配模版
            else:
                answer = '回答：' + response

        s = '展开全部'
        print type(answer).__name__

        if (type(answer).__name__ == 'list' and len(answer)>0) or '唔... 怎么回答...'in answer or '天气' in msg:
            answer = geta(msg)
            answer = answer
            print 'wocao'
        else:
            #print answer
            if s in str(answer):
                print answer
                answer = str(answer).replace('\n', '').replace('展开全部', "").split('已赞过')[0]
                print  'OS' +answer
        print user_socket,msg

        res =answer
        a = {
            "username": "******",
            "avatar": "https://robot.rszhang.top/images/icon/nv/0.jpg",
            "id": "-2",  # //消息的来源ID（如果是私聊，则是用户id，如果是群聊，则是群组id）
            "type": "friend",  # //聊天窗口来源类型，从发送消息传递的to里面获取
            "content": res,  # //消息内容
            "cid": 0,  # //消息id，可不传。除非你要对消息进行一些操作（如撤回）
            "mine": True,  # //是否我发送的消息，如果为true，则会显示在右方
            "fromid": "100000",  # /消息的发送者id（比如群组中的某个消息发送者），可用于自动解决浏览器多窗口时的一些问题
            "timestamp": 1467475443306,  # //服务端时间戳毫秒数。注意：如果你返回的是标准的 unix 时间戳，记得要 *1000
        }

        user_socket.send(json.dumps(a))

예제 #6

파일 보기

파일: server.py 프로젝트: csc285650409/ZWGX

def QA(input_message, mybot):
    findAns = False
    reply = ''
    ansdict = {}
    dbname = 'zwgx'  #数据库名
    dbip = '106.14.124.221'  #数据库IPlocalhost
    dbport = 3306  #数据库端口
    dbusername = '******'  #数据库用户名
    dbpassword = '******'  #数据库密码root
    schoolname = ''
    intention = ''
    if len(input_message) > 60:
        reply = mybot.respond("句子长度过长")
        findAns = True
    elif input_message.strip() == '无':
        reply = mybot.respond("无")
        findAns = True

    if (findAns == False):
        # print input_message
        message = T.wordSegment(input_message)
        # 分词去标点
        if message == 'q':
            exit()
        else:
            print 'word Seg:' + message
            print '词性：'
            words = T.postag(input_message)
            for w in words:
                print w.word, w.flag
                if w.flag == 'school':
                    try:
                        db = pymysql.connect(host=dbip,
                                             user=dbusername,
                                             passwd=dbpassword,
                                             db=dbname,
                                             charset="utf8")
                        cursor = db.cursor()
                        sql = u"SELECT `学校名` FROM 学校简称 WHERE `简称`='" + w.word + "'"
                        # 执行SQL语句
                        cursor.execute(sql)
                        # 获取所有记录列表
                        results = cursor.fetchall()
                        #替换简称
                        if len(results) > 0:
                            input_message = input_message.replace(
                                w.word, results[0][0]).__str__()
                            w.flag = 'nt'
                            w.word = results[0][0]
                        # 关闭数据库连接
                        db.close()
                    except Exception as e:
                        print(e)
                # 识别学校简称并配对数据库中已存内容
                if w.flag == 'x' or w.flag == 'nt':
                    try:
                        db = pymysql.connect(host=dbip,
                                             user=dbusername,
                                             passwd=dbpassword,
                                             db=dbname,
                                             charset="utf8")
                        cursor = db.cursor()
                        sql = u"SELECT `属性`,`内容` FROM school WHERE `学校`='" + w.word + "'"
                        # 执行SQL语句
                        cursor.execute(sql)
                        # 获取所有记录列表
                        results = cursor.fetchall()
                        if len(results) > 0:
                            for row in results:
                                ansdict[row[0]] = row[1]
                                #print row[0],row[1]
                                # reply +=row[0].encode("utf8")
                                # reply+=" ".encode("utf8")
                            # shuxing=raw_input('Frank：你想了解什么属性 ' + reply+">>")
                            # sql = u"SELECT `内容` FROM school WHERE `学校`='" + w.word + u"'AND `属性`='"+shuxing+"'"
                            # cursor.execute(sql)
                            # results = cursor.fetchall()
                            # if len(results)>0:
                            #     print "Frank： "+results[0][0].encode("utf8")
                            #     reply=results[0][0].encode("utf8")
                            #     return reply
                        # 关闭数据库连接
                        db.close()
                    except Exception as e:
                        print(e)
                #todo： 每个词去找查数据库可以优化一下 加一下词性判断
                if FindSchool(dbip, dbusername, dbpassword, dbname,
                              w.word) != "":
                    schoolname = FindSchool(dbip, dbusername, dbpassword,
                                            dbname, w.word)

            response = mybot.respond(input_message.strip())

            print "======="
            print response
            print "======="

            if response == "":
                reply = mybot.respond('找不到答案')
                findAns = True
                print 'Frank：' + reply


# *********************************************************************************
# 百科搜索
            elif response[0] == '#':
                # 匹配百科
                # if response.__contains__("searchbaike"):
                #     print "searchbaike"
                #     print response
                #     res = response.split(':')
                #     # 实体
                #     entity = str(res[1]).replace(" ", "")
                #     # 属性
                #     attr = str(res[2]).replace(" ", "")
                #     print entity + '<---->' + attr
                #
                #     ans = baike.query(entity, attr)
                #     # 如果命中答案
                #     if type(ans) == list:
                #         print 'Frank：' + QAT.ptranswer(ans, False)
                #         reply = QAT.ptranswer(ans, False)
                #         findAns = True
                #     elif ans.decode('utf-8').__contains__(u'::找不到'):
                #         # 百度摘要+Bing摘要
                #         print "通用搜索"
                #         ans = search_summary.kwquery(input_message)
                #
                # # 匹配不到模版，通用查询
                # elif response.__contains__("NoMatchingTemplate"):
                #     print "NoMatchingTemplate"

                if (schoolname != ""):
                    sock = socket(AF_INET, SOCK_STREAM)
                    sock.connect(('127.0.0.1', 50009))
                    sock.sendall(input_message.encode("utf-8"))
                    intention = sock.recv(1024)
                    sock.close()
                    print intention

                if unicode(intention) in ansdict:
                    reply = ansdict[unicode(intention)]
                    print 'Frank：' + reply.encode("utf8")
                else:
                    TempDict = search_summary.kwquery(input_message, intention,
                                                      schoolname)
                    ansdict['schoolname'] = TempDict['schoolname']
                    ansdict['intention'] = TempDict['intention']
                    ansdict['index'] = TempDict['index']
                    ans = TempDict['answer']
                    #*********************************************************************************
                    if (findAns == False):
                        if len(ans) == 0:
                            ans = mybot.respond('找不到答案')
                            print 'Frank：' + ans
                            reply = ans
                            findAns = True
                        elif len(ans) > 1:
                            print "不确定候选答案"
                            print 'Frank: '
                            for a in ans:
                                print a.encode("utf8")
                                reply += a.encode("utf8") + '\n'
                            findAns = True
                        else:
                            print 'Frank：' + ans[0].encode("utf8")
                            reply = ans[0].encode("utf8")
                            findAns = True

            # 匹配模版
            else:
                print 'Frank：' + response
                reply = response
                findAns = True

    ansdict['baidu'] = reply
    json_s = json.dumps(ansdict)
    return json_s

예제 #7

파일 보기

파일: MainProgram.py 프로젝트: pettertang/QA-Snake

 '----------------'  '----------------'  '----------------'  '----------------'  '----------------'
 Eric：你好，我是Eric。╭(╯^╰)╮
    '''

    while True:
        input_message = raw_input("Enter your message >> ")

        if len(input_message) > 60:
            print mybot.respond("句子长度过长")
            continue
        elif input_message.strip() == '':
            print mybot.respond("无")
            continue

        print input_message
        message = T.wordSegment(input_message)
        # 去标点
        print 'word Seg:'+ message
        print '词性：'
        words = T.postag(input_message)


        if message == 'q':
            exit()
        else:
            response = mybot.respond(message)

            print "======="
            print response
            print "======="

예제 #8

파일 보기

파일: MainProgram.py 프로젝트: iiclear/flask_app

def run(question):
    # 初始化jb分词器
    T.jieba_initialize()

    # 切换到语料库所在工作目录
    mybot_path = './'
    os.chdir(mybot_path)

    mybot = aiml.Kernel()
    if os.path.isfile("bot_brain.brn"):
        mybot.bootstrap(brainFile="bot_brain.brn")
    else:
        mybot.learn(
            os.path.split(os.path.realpath(__file__))[0] +
            "/resources/std-startup.xml")
        mybot.learn(
            os.path.split(os.path.realpath(__file__))[0] +
            "/resources/tuling.xml")
        mybot.learn(
            os.path.split(os.path.realpath(__file__))[0] +
            "/resources/bye.aiml")
        mybot.learn(
            os.path.split(os.path.realpath(__file__))[0] +
            "/resources/tools.aiml")
        mybot.learn(
            os.path.split(os.path.realpath(__file__))[0] +
            "/resources/bad.aiml")
        mybot.learn(
            os.path.split(os.path.realpath(__file__))[0] +
            "/resources/funny.aiml")
        mybot.learn(
            os.path.split(os.path.realpath(__file__))[0] +
            "/resources/OrdinaryQuestion.aiml")
        mybot.learn(
            os.path.split(os.path.realpath(__file__))[0] +
            "/resources/Common conversation.aiml")
        #mybot.bootstrap(learnFiles="std-startup.xml", commands="load aiml b")
        mybot.saveBrain("bot_brain.brn")

    # 载入百科属性列表

    print '''
    Eric：你好，我是问答机器人。╭(╯^╰)╮
       '''

    input_message = question

    if len(input_message) > 60:
        return mybot.respond("句子长度过长")
        #continue
    elif input_message.strip() == '':
        return mybot.respond("无话可说")
        #continue

    # print input_message
    message = T.wordSegment(input_message)
    # 去标点
    # print 'word Seg:'+ message
    # print '词性：'
    words = T.postag(input_message)

    if message == 'q':
        exit()
    else:
        response = mybot.respond(message)  # 在AIML数据集里寻找答案

        print "======="
        if response[0] == '#':
            print response + 'mark'
            pass
        else:
            return response

        print "======="

        if response == "":
            ans = mybot.respond('找不到答案')
            print 'Eric：' + ans
        # 百科搜索
        elif response[0] == '#' or len(response) < 1:
            # 匹配百科
            if response.__contains__("searchbaike"):
                print "searchbaike"
                print response
                res = response.split(':')
                # 实体
                entity = str(res[1]).replace(" ", "")
                # 属性
                attr = str(res[2]).replace(" ", "")
                print entity + '<---->' + attr

                ans = baike.query(entity, attr)

                # 如果命中答案
                if type(ans) == list:
                    return '回答：' + QAT.ptranswer(ans, False)
                    #continue
                elif ans.decode('utf-8').__contains__(u'::找不到'):
                    # 百度摘要+Bing摘要
                    print "通用搜索"
                    ans = search_summary.kwquery(input_message)

            # 匹配不到模版，通用查询
            elif response.__contains__("NoMatchingTemplate"):
                print "NoMatchingTemplate"
                ans = search_summary.kwquery(input_message)

            if len(ans) == 0:
                ans = mybot.respond('找不到答案')
                return '回答：' + ans
            elif len(ans) > 1:
                print "不确定候选答案"
                return ans
                print 'Eric: '
                for a in ans:
                    print a.encode("utf-8")
            else:
                return '回答：' + ans[0].encode("utf-8")

        # 匹配模版
        else:
            return '回答：' + response

예제 #9

파일 보기

파일: server.py 프로젝트: iiclear/bug_fix0

    def handle(self):
        conn = self.request
        conn.sendall('欢迎访问智能百科问答系统')
        Flag =True
        data = conn.recv(4096)
        while Flag:
            input_message = data

            print "input_message====="
            print input_message
            print "=========="

            reply = ''

            if len(input_message) > 60:
                print mybot.respond("句子长度过长")
                reply = mybot.respond("句子长度过长")
                conn.sendall(reply)
                Flag =False
                continue
            elif input_message.strip() == '无':
                print mybot.respond("无")
                reply = mybot.respond("无")
                conn.sendall(reply)
                Flag = False
                continue

            print input_message
            message = T.wordSegment(input_message)
            # 去标点
            print 'word Seg:' + message
            print '词性：'
            words = T.postag(input_message)

            if message == 'q':
                exit()
            else:
                response = mybot.respond(message)

                print "======="
                print response
                print "======="

                if response == "":
                    ans = mybot.respond('找不到答案')
                    print 'Eric：' + ans
                    reply = mybot.respond('找不到答案')
                    conn.sendall(reply)
                    Flag = False

                # 百科搜索
                elif response[0] == '#':
                    # 匹配百科
                    if response.__contains__("searchbaike"):
                        print "searchbaike"
                        print response
                        res = response.split(':')
                        # 实体
                        entity = str(res[1]).replace(" ", "")
                        # 属性
                        attr = str(res[2]).replace(" ", "")
                        print entity + '<---->' + attr

                        ans = baike.query(entity, attr)
                        # 如果命中答案
                        if type(ans) == list:
                            print 'Eric：' + QAT.ptranswer(ans, False)
                            reply = QAT.ptranswer(ans, False)
                            conn.sendall(reply)
                            Flag = False
                            continue
                        elif ans.decode('utf-8').__contains__(u'::找不到'):
                            # 百度摘要+Bing摘要
                            print "通用搜索"
                            ans = search_summary.kwquery(input_message)

                    # 匹配不到模版，通用查询
                    elif response.__contains__("NoMatchingTemplate"):
                        print "NoMatchingTemplate"
                        ans = search_summary.kwquery(input_message)

                    if len(ans) == 0:
                        ans = mybot.respond('找不到答案')
                        print 'Eric：' + ans
                        reply = ans
                        conn.sendall(reply)
                        Flag = False

                    elif len(ans) > 1:
                        print "不确定候选答案"
                        print 'Eric: '
                        for a in ans:
                            print a.encode("utf8")
                            reply += a.encode("utf8") + '\n'
                        conn.sendall(reply)
                        Flag = False
                    else:
                        print 'Eric：' + ans[0].encode("utf8")
                        reply = ans[0].encode("utf8")
                        conn.sendall(reply)
                        Flag = False

                # 匹配模版
                else:
                    print 'Eric：' + response
                    reply = response
                    conn.sendall(reply)
                    Flag = False

예제 #10

파일 보기

파일: qa.py 프로젝트: pettertang/QA-Snake

def qa(question):

    #初始化jb分词器
    T.jieba_initialize()

    #切换到语料库所在工作目录
    mybot_path = './'
    # os.chdir(mybot_path)

    mybot = aiml.Kernel()
    mybot.learn(os.path.split(os.path.realpath(__file__))[0]+"/resources/std-startup.xml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/OrdinaryQuestion.aiml")
    mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/Common conversation.aiml")
    # mybot.respond('Load Doc Snake')
    #载入百科属性列表

    print '''
.----------------.  .-----------------. .----------------.  .----------------.  .----------------.
| .--------------. || .--------------. || .--------------. || .--------------. || .--------------. |
| |    _______   | || | ____  _____  | || |      __      | || |  ___  ____   | || |  _________   | |
| |   /  ___  |  | || ||_   \|_   _| | || |     /  \     | || | |_  ||_  _|  | || | |_   ___  |  | |
| |  |  (__ \_|  | || |  |   \ | |   | || |    / /\ \    | || |   | |_/ /    | || |   | |_  \_|  | |
| |   '.___`-.   | || |  | |\ \| |   | || |   / /__\ \   | || |   |  __'.    | || |   |  _|  _   | |
| |  |`\____) |  | || | _| |_\   |_  | || | _/ /    \ \_ | || |  _| |  \ \_  | || |  _| |___/ |  | |
| |  |_______.'  | || ||_____|\____| | || ||____|  |____|| || | |____||____| | || | |_________|  | |
| |              | || |              | || |              | || |              | || |              | |
| '--------------' || '--------------' || '--------------' || '--------------' || '--------------' |
 '----------------'  '----------------'  '----------------'  '----------------'  '----------------'
 Eric：你好，我是Eric。╭(╯^╰)╮
    '''


    input_message = question

    if len(input_message) > 60:
        print mybot.respond("句子长度过长")
    elif input_message.strip() == '':
        print mybot.respond("无")

    print input_message
    message = T.wordSegment(input_message)
    # 去标点
    print 'word Seg:'+ message
    print '词性：'
    words = T.postag(input_message)


    if message == 'q':
        exit()
    else:
        response = mybot.respond(message)

        print "======="
        print response
        print "======="

        if response == "":
            ans = mybot.respond('找不到答案')
            print 'Eric：' + ans
        # 百科搜索
        elif response[0] == '#':
            # 匹配百科
            if response.__contains__("searchbaike"):
                print "searchbaike"
                print response
                res = response.split(':')
                #实体
                entity = str(res[1]).replace(" ","")
                #属性
                attr = str(res[2]).replace(" ","")
                print entity+'<---->'+attr

                ans = baike.query(entity, attr)
                # 如果命中答案
                if type(ans) == list:
                    print 'Eric：' + QAT.ptranswer(ans,False)
                elif ans.decode('utf-8').__contains__(u'::找不到'):
                    #百度摘要+Bing摘要
                    print "通用搜索"
                    ans = search_summary.kwquery(input_message)

            # 匹配不到模版，通用查询
            elif response.__contains__("NoMatchingTemplate"):
                print "NoMatchingTemplate"
                ans = search_summary.kwquery(input_message)


            if len(ans) == 0:
                ans = mybot.respond('找不到答案')
                print 'Eric：' + ans
            elif len(ans) >1:
                print "不确定候选答案"
                print 'Eric: '
                for a in ans:
                    print a.encode("utf8")
            else:
                print 'Eric：' + ans[0].encode("utf8")

        # 匹配模版
        else:
            print 'Eric：' + response

예제 #11

파일 보기

파일: Server.py 프로젝트: WillXing/AnswerServer

def answer(question):
  if len(question) > 600:
      print mybot.respond("句子长度过长")
      raise Exception("Too Long")
  elif question.strip() == '':
      print mybot.respond("无")
      raise Exception("No Input")

  print question
  message = T.wordSegment(question)
  # 去标点
  print 'word Seg:'+ message
  print '词性：'
  words = T.postag(question)


  if message == 'q':
      exit()
  else:
    response = mybot.respond(message)

    print response

    if response == "":
        raise Exception("No Answer")
    # 百科搜索
    elif response[0] == '#':
      # 匹配百科
      if response.__contains__("searchbaike"):
        print "searchbaike"
        print response
        res = response.split(':')
        #实体
        entity = str(res[1]).replace(" ","")
        #属性
        attr = str(res[2]).replace(" ","")
        print entity+'<---->'+attr

        ans = baike.query(entity, attr)
        # 如果命中答案
        if type(ans) == list:
          print 'Eric：' + QAT.ptranswer(ans,False)
          return [QAT.ptranswer(ans,False)]
        elif ans.decode('utf-8').__contains__(u'::找不到'):
          #百度摘要+Bing摘要
          print "通用搜索"
          ans = search_summary.kwquery(question)

      # 匹配不到模版，通用查询
      elif response.__contains__("NoMatchingTemplate"):
        print "NoMatchingTemplate"
        ans = search_summary.kwquery(question)


      if len(ans) == 0:
        raise Exception("No Answer")
      elif len(ans) >1:
        print "不确定候选答案"
        print 'Eric: '
        for a in ans:
          print a.encode("utf8")
        return [a.encode("utf8") for a in ans]
      else:
        print 'Eric：' + ans[0].encode("utf8")
        return [ans[0].encode("utf8")]

    # 匹配模版
    else:
      print 'Eric：' + response
      return [response]