Ejemplos de TextProcess en Python

Lenguaje de programación: Python

Namespace/Package Name: Tools

Clase / Tipo: TextProcess

Ejemplos en hotexamples.com: 16

Python TextProcess - 16 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de Tools.TextProcess extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

postag(8)

jieba_initialize(4)

wordSegment(4)

cut(3)

load_baikeattr_name(2)

load_synonyms_word_inattr(2)

Ejemplo n.º 1

Mostrar archivo

Archivo: baike.py Proyecto: zyyyyy/MillionHeroAssistant

def query(entity, attr):
    soup = To.get_html_baidu("http://baike.baidu.com/item/" + entity)
    basicInfo_block = soup.find(class_='basic-info cmn-clearfix')
    if basicInfo_block == None:
        # print 'info None'
        return attr + "::找不到"
    else:
        info = get_info(basicInfo_block)
        # for i in info:
        #     print i
        #     print info[i]
        # print '-----------'
        if info.has_key(attr.decode('utf8')):
            # print 'has key'+attr.decode('utf8')
            return info[attr.decode('utf8')]
        else:
            # print 'no key 进行同义词判断'
            # 同义词判断
            attr_list = T.load_baikeattr_name(
                os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) +
                '/resources/Attribute_name.txt')
            attr = T.load_synonyms_word_inattr(
                attr,
                os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) +
                '/resources/SynonDic.txt', attr_list)
            if info.has_key(attr.decode('utf8')):
                return info[attr.decode('utf8')]
            else:
                return attr + "::找不到"

Ejemplo n.º 2

Mostrar archivo

def query(entity, attr):
    entity_uri = 'http://baike.baidu.com/item/' + entity
    result = '查询百科列表实体:' + entity_uri + '\n'
    soup = To.get_html_baidu(entity_uri)
    basicInfo_block = soup.find(class_='basic-info cmn-clearfix')
    if basicInfo_block == None:
        return result + entity + "-找不到\n"
    else:
        info = get_info(basicInfo_block)
        if attr in info:
            return info[attr]
        else:
            # 同义词判断
            result += '属性' + attr + '-找不到\n'
            attr_list = T.load_baikeattr_name(
                os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) +
                '/resources/Attribute_name.txt')
            attr = T.load_synonyms_word_inattr(
                attr,
                os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) +
                '/resources/SynonDic.txt', attr_list)
            if attr in info:
                return info[attr]
            else:
                return result + '同义属性' + attr + '-找不到\n'

Ejemplo n.º 3

Mostrar archivo

 def _get_key_sentence(self, contents, query_cut):
     """
     获得关键语句作为答案
     :param contents: 句子集合
     :param query_cut: 问句提取关键词
     :return:
     """
     # 一个句子内有更高的高频词，说明句子的重要性更棒棒
     split_result = []  # 分词结果
     TF = {}
     IDF = {}
     TF_IDF = {}
     for s in contents:
         word_list = TextProcess.cut(s)
         word_list = list(
             set([word for word in word_list
                  if word not in self.stop_word]))
         split_result.append(word_list)
         for word in word_list:
             TF[word] = TF.get(word, 0) + 1
         for word in set(word_list):
             IDF[word] = IDF.get(word, 0) + 1  # 含该词的句子数，而不是出现的次数
     for k in TF:
         TF[k] = TF[k] / len(TF)
         IDF[k] = math.log(len(contents) / IDF[k])
         TF_IDF[k] = TF[k] * IDF[k]
     topic_word = sorted(TF_IDF, key=lambda k: TF_IDF[k], reverse=True)
     topic_word = topic_word[:self.topic]
     # print("Query:", query_cut)
     # print("Topic:", topic_word)
     # 得分 词的重要性是（用tf或tf-idf衡量）/句子长度
     score = []
     for i, word_list in enumerate(split_result):
         s = 0.
         if len(word_list) <= 1 or (len(word_list) == 2
                                    and word_list[1] == " "):
             # 只有一个词或者一个词加空格不太可能是答案
             continue
         # print("sentence:{}\nwortcut:{}".format(contents[i], word_list))
         for word in word_list:
             w = 0
             if word in query_cut:
                 # print("Word {} in query".format(word))
                 w += 0.5
             if word in topic_word:
                 # print("Word {} in topic".format(word))
                 w += 0.5
             s += TF_IDF[word] * w
         # s = s / len(word_list)
         score.append((i, s))
         # print("Score:{:.5f}".format(s))
         # print("-------------------------------------")
     score = sorted(score, key=lambda x: x[1], reverse=True)
     result = []
     if len(score) > self.n:
         score = score[:self.n]
     for pair in score:
         result.append(contents[pair[0]])
     return result

Ejemplo n.º 4

Mostrar archivo

Archivo: codesearch.py Proyecto: yangyuBUAA/rabbitQA

    def _get_key_sentence(self, contents):
        """

        获得关键语句作为答案
        :param contents: 句子集合
        :return:
        """
        # 一个句子内有更高的高频词，说明句子的重要性更棒棒
        split_result = []  # 分词结果
        TF = {}
        IDF = {}
        for s in contents:
            word_list = TextProcess.cut(s)
            word_list = [
                word for word in word_list if word not in self.stop_word
            ]
            split_result.append(word_list)
            for word in word_list:
                TF[word] = TF.get(word, 0) + 1
            for word in set(word_list):
                IDF[word] = IDF.get(word, 0) + 1  # 含该词的句子数，而不是出现的次数
        for k in TF:
            TF[k] = TF[k] / len(TF)
            IDF[k] = math.log(len(contents) / IDF[k])
        # 得分 词的重要性是（用tf或tf-idf衡量）/句子长度
        score = []
        for i, word_list in enumerate(split_result):
            s = 0.

            if len(word_list) <= 1 or (len(word_list) == 2
                                       and word_list[1] == " "):
                # 只有一个词或者一个词加空格不太可能是答案
                continue
            alpha_num = 0.  # 考虑答案多为代码和命令含英文和符号超过一定比例的的权重增加
            for word in word_list:
                if self._judge_pure_english(word):
                    alpha_num += 1
                s += TF[word] * IDF[word]

            if alpha_num == 0:
                s = 0  # 一个英文字符都没，肯定是不需要的
            else:
                s = s / len(word_list)
                if (alpha_num / len(word_list)) > 0.5:
                    s = s * (1. + (alpha_num / len(word_list)))
                else:
                    s = s * (alpha_num / len(word_list))
            # print("word cut{} score:{} alpha:{}".format(word_list, s, alpha_num))
            score.append((i, s))
        score = sorted(score, key=lambda x: x[1], reverse=True)
        result = []
        if len(score) > self.n:
            score = score[:self.n]

        for pair in score:
            print(contents[pair[0]], pair[1])
            result.append(contents[pair[0]])
        return result

Ejemplo n.º 5

Mostrar archivo

Archivo: SqliteQA.py Proyecto: yangyuBUAA/rabbitQA

 def _similarity(self, t1, t2):
     """
     百度api存在qps的问题
     :param t1:
     :param t2:
     :return:
     """
     t1_list = [
         word for word in TextProcess.cut(t1) if word not in self.stop_word
     ]
     t2_list = [
         word for word in TextProcess.cut(t2) if word not in self.stop_word
     ]
     em1 = self.sentence_emb(t1_list)
     em2 = self.sentence_emb(t2_list)
     score = self.cos(em1, em2)
     # score = self.vector_similarity(t1_list, t2_list)
     score = score * 0.5 + 0.5  # 归一化
     return 1, score

Ejemplo n.º 6

Mostrar archivo

def qa(question):

    #初始化jieba分词器
    T.jieba_initialize()

    #切换到语料库所在工作目录
    mybot_path = './'
    # os.chdir(mybot_path)

    mybot = aiml.Kernel()
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/std-startup.xml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/OrdinaryQuestion.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/Common conversation.aiml")
    # mybot.respond('Load Doc Snake')
    #载入百科属性列表

    input_message = question

    if len(input_message) > 60:
        print(mybot.respond("句子长度过长"))
    elif input_message.strip() == '':
        print(mybot.respond("无"))

    print(input_message)
    message = T.wordSegment(input_message)
    # 去标点
    print('word Seg:' + message)
    print('词性：')
    words = T.postag(input_message)
    if message == 'q':
        exit()
    else:
        response = mybot.respond(message)

        print("=======")
        print(response)
        print("=======")

        if response == "":
            ans = mybot.respond('找不到答案')
            # print(robot_id + ":" + ans)
            print("{0}:{1}".format(robot_id, ans))
        # 百科搜索
        elif response[0] == '#':
            # 匹配百科
            if response.__contains__("searchbaike"):
                print("search from baike")
                print(response)
                res = response.split(':')
                #实体
                entity = str(res[1]).replace(" ", "")
                #属性
                attr = str(res[2]).replace(" ", "")
                print(entity + '<---->' + attr)

                ans = baike.query(entity, attr)
                # 如果命中答案
                if type(ans) == list:
                    print("{0}:{1}".format(robot_id, QAT.ptranswer(ans,
                                                                   False)))
                elif ans.decode('utf-8').__contains__(u'::找不到'):
                    #百度摘要+Bing摘要
                    print("通用搜索")
                    log.info("通用搜索")
                    ans = search_summary.kwquery(input_message)

            # 匹配不到模版，通用查询
            elif response.__contains__("NoMatchingTemplate"):
                print("NoMatchingTemplate")
                ans = search_summary.kwquery(input_message)

            if len(ans) == 0:
                ans = mybot.respond('找不到答案')
                logs.info("{0}:{1}".format(robot_id, ans))
            elif len(ans) > 1:
                logs.info(sys.exc_info())
                logs.info("不确定候选答案")
                logs.info("[{0}][func:{1}][line:{2}]:不确定候选答案".format(
                    sys._getframe().f_code.co_filename,
                    sys._getframe().f_code.co_name,
                    sys._getframe().f_lineno))
                print(robot_id + ': ')
                for a in ans:
                    print(a)
                    # print(a.encode("utf8"))
            else:
                print('{0}:{1}'.format(robot_id, ans[0]))

        # 匹配模版
        else:
            print("{}: {}".format(robot_id, response))

Ejemplo n.º 7

Mostrar archivo

def main():
    # 初始化jb分词器
    T.jieba_initialize()

    # 切换到语料库所在工作目录
    mybot_path = './'
    os.chdir(mybot_path)

    # 加载AIML的规则
    mybot = aiml.Kernel()
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/std-startup.xml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/abc.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/bot_profile.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/general.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/infor.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/main.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/new07281.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/salutations.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/virus0727.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/zextra_weibao.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/OrdinaryQuestion.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/Common conversation.aiml")

    # mybot.respond('Load Doc Snake')
    #载入百科属性列表

    #     print '''
    # .----------------.  .-----------------. .----------------.  .----------------.  .----------------.
    # | .--------------. || .--------------. || .--------------. || .--------------. || .--------------. |
    # | |    _______   | || | ____  _____  | || |      __      | || |  ___  ____   | || |  _________   | |
    # | |   /  ___  |  | || ||_   \|_   _| | || |     /  \     | || | |_  ||_  _|  | || | |_   ___  |  | |
    # | |  |  (__ \_|  | || |  |   \ | |   | || |    / /\ \    | || |   | |_/ /    | || |   | |_  \_|  | |
    # | |   '.___`-.   | || |  | |\ \| |   | || |   / /__\ \   | || |   |  __'.    | || |   |  _|  _   | |
    # | |  |`\____) |  | || | _| |_\   |_  | || | _/ /    \ \_ | || |  _| |  \ \_  | || |  _| |___/ |  | |
    # | |  |_______.'  | || ||_____|\____| | || ||____|  |____|| || | |____||____| | || | |_________|  | |
    # | |              | || |              | || |              | || |              | || |              | |
    # | '--------------' || '--------------' || '--------------' || '--------------' || '--------------' |
    #  '----------------'  '----------------'  '----------------'  '----------------'  '----------------'
    #  Eric：你好，我是Eric。╭(╯^╰)╮
    #     '''

    print('泰康小康：你好，我是小康。╭(╯^╰)╮')

    # 对问题的处理流程
    while True:
        # 输入这个问题
        input_message = raw_input("您想问什么 >> ")

        # 对输入问题进行简单的处理：
        # 忽略过长（超过60）的问题
        # 忽略空问题
        if len(input_message) > 60:
            print(mybot.respond("句子长度过长"))
            continue
        elif input_message.strip() == '':
            print(mybot.respond("无"))
            continue

        # 利用Tools工具对问题进行处理
        print(input_message)
        message = T.wordSegment(input_message)
        # 去标点
        print('word Seg:' + message)
        # print('词性：')
        words = T.postag(input_message)

        # 退出
        if message == 'q':
            exit()
        # 返回信息的优先级
        else:
            # 首先是AIML的模板匹配
            response = mybot.respond(message)

            print("=======")
            print(response)
            print(len(response.decode('utf8')))
            print("=======")

Ejemplo n.º 8

Mostrar archivo

def kwquery(query):
    #分词 去停用词 抽取关键词
    log = '提取关键词:'
    keywords = []
    words = T.postag(query)
    for k in words:
        # 只保留名词
        if k.flag.__contains__("n"):
            keywords.append(k.word)
            log += k.word
    log += '#' * 50 + '\n'
    answer = []
    text = []
    # 找到答案置1
    flag = 0

    # 抓取百度前10条的摘要
    log += '百度前10条的摘要'
    url = 'https://www.baidu.com/s?wd=' + quote(query)
    log += url + '#' * 50 + '\n'
    soup_baidu = To.get_html_baidu(url)

    for i in range(1, 11):
        if soup_baidu == None:
            break
        results = soup_baidu.find(id=i)

        if results == None:
            log += '百度摘要找不到答案' + '#' * 50 + '\n'
            break
        log += '第' + str(i) + '条摘要:\n'
        log += clean_str(results.get_text()) + '#' * 50 + '\n'
        #判断是否有mu,如果第一个是百度知识图谱的 就直接命中答案
        #if 'mu' in results.attrs and i == 1:
        if 'mu' in results.attrs and results.find(
                class_='op_exactqa_s_answer') != None:
            r = results.find(class_='op_exactqa_s_answer')
            log += '第一条百度摘要为百度搜索根据知识图谱直接匹配出的内容，优先查找\n'
            log += '百度知识图谱找到答案' + '#' * 50 + '\n'
            answer.append(r.get_text().strip())
            flag = 1
            break

        #古诗词判断
        if 'mu' in results.attrs and i == 1 and results.find(
                class_="op_exactqa_detail_s_answer") != None:
            r = results.find(class_="op_exactqa_detail_s_answer")
            log += '百度诗词找到答案' + '#' * 50 + '\n'
            answer.append(r.get_text().strip())
            flag = 1
            break

        #万年历 & 日期
        if 'mu' in results.attrs and i == 1 and results.attrs[
                'mu'].__contains__('http://open.baidu.com/calendar'):
            r = results.find(class_="op-calendar-content")
            if r != None:
                log += '百度万年历找到答案' + '#' * 50 + '\n'
                answer.append(r.get_text().strip().replace("\n", "").replace(
                    " ", ""))
                flag = 1
                break

        if 'tpl' in results.attrs and i == 1 and results.attrs[
                'tpl'].__contains__('calendar_new'):
            r = results.attrs['fk'].replace("6018_", "")
            if r != None:
                log += '百度万年历新版找到答案' + '#' * 50 + '\n'
                answer.append(r)
                flag = 1
                break

        #计算器
        if 'mu' in results.attrs and i == 1 and results.attrs[
                'mu'].__contains__(
                    'http://open.baidu.com/static/calculator/calculator.html'):
            r = results.find(class_="op_new_val_screen_result")
            if r != None:
                log += '计算器找到答案' + '#' * 50 + '\n'
                answer.append(r.get_text().strip())
                flag = 1
                break

        # 百度知道答案
        #if 'mu' in results.attrs and i == 1:
        if 'mu' in results.attrs and results.find(
                class_='op_best_answer_question_link') != None:
            r = results.find(class_='op_best_answer_question_link')
            url = r['href']
            zhidao_soup = To.get_html_zhidao(url)
            r = zhidao_soup.find(class_='bd answer').find('pre')
            if r == None:
                r = zhidao_soup.find(class_='bd answer').find(
                    class_='line content')
            log += '百度知道best answer找到答案' + '#' * 50 + '\n'
            answer.append(r.get_text())
            flag = 1
            break

        if results.find("h3") != None:
            # 百度知道
            if results.find("h3").find("a").get_text().__contains__(u"百度知道"):
                url = results.find("h3").find("a")['href']
                if url == None:
                    log += '百度知道找不到答案' + '#' * 50 + '\n'
                    continue
                else:
                    log += '百度知道找到答案' + '#' * 50 + '\n'
                    zhidao_soup = To.get_html_zhidao(url)
                    r = zhidao_soup.find(class_='bd answer')
                    if r == None:
                        continue
                    else:
                        r = r.find('pre')
                        if r == None:
                            r = zhidao_soup.find(class_='bd answer').find(
                                class_='line content')
                    answer.append(r.get_text().strip())
                    flag = 1
                    break

            # 百度百科
            if results.find("h3").find("a").get_text().__contains__(u"百度百科"):
                url = results.find("h3").find("a")['href']
                if url == None:
                    log += '百度百科找不到答案' + '#' * 50 + '\n'
                    continue
                else:
                    log += '百度百科找到答案' + '#' * 50 + '\n'
                    baike_soup = To.get_html_baike(url)

                    r = baike_soup.find(class_='lemma-summary')
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n", "").strip()
                    answer.append(r)
                    flag = 1
                    break
        text.append(
            clean_str(results.get_text()) + "(" + results.find("a")['href'] +
            ")")

    if flag == 1:
        return (answer, log)
    log += '百度前十条摘要不包含百度知道、百度百科内容，或相关内容中找不到答案' + '#' * 50 + '\n'

    #获取bing的摘要
    log += '通过Bing查找\n'
    url = 'https://www.bing.com/search?q=' + quote(query)
    log += url + '\n'
    log += '#' * 50
    soup_bing = To.get_html_bing(url)
    # 判断是否在Bing的知识图谱中
    # bingbaike = soup_bing.find(class_="b_xlText b_emphText")
    bingbaike = soup_bing.find(class_="bm_box")

    if bingbaike != None:
        if bingbaike.find_all(class_="b_vList")[1] != None:
            if bingbaike.find_all(class_="b_vList")[1].find("li") != None:
                log += 'Bing百科找到答案' + '#' * 50 + '\n'
                flag = 1
                answer.append(bingbaike.get_text())
                return (answer, log)
    else:
        log += 'Bing百科找不到答案' + '#' * 50 + '\n'
        results = soup_bing.find(id="b_results")
        bing_list = results.find_all('li')
        for bl in bing_list:
            temp = bl.get_text()
            if temp.__contains__(u" - 必应网典"):
                url = bl.find("h2").find("a")['href']
                if url == None:
                    log += 'Bing网典找不到答案' + '#' * 50 + '\n'
                    continue
                else:
                    log += 'Bing网典找到答案' + '#' * 50 + '\n'
                    bingwd_soup = To.get_html_bingwd(url)

                    r = bingwd_soup.find(class_='bk_card_desc').find("p")
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n", "").strip()
                    answer.append(r)
                    flag = 1
                    break

        if flag == 1:
            return (answer, log)

    log += '没有找到答案，返回百度前十条摘要内容\n'
    #if flag == 0:
    #分句
    #log += ''
    #cutlist = ["。", "?", ".", "_", "-", "：", "！", "？"]
    #temp = ''
    #sentences = []
    #for i in range(0,len(text)):
    #    if text[i] in cutlist:
    #         if temp == '':
    #             continue
    #         else:
    #             sentences.append(temp)
    #         temp = ''
    #     else:
    #         temp += text[i]
    #
    # # 找到含有关键词的句子,去除无关的句子
    # key_sentences = {}
    # for s in sentences:
    #     for k in keywords:
    #         if k in s:
    #             key_sentences[s]=1

    # 根据问题制定规则
    # 识别人名
    #target_list = {}
    #for ks in key_sentences:
    #    # print ks
    #    words = T.postag(ks)
    #    for w in words:
    #        if w.flag == ("nr"):
    #            if w.word in target_list:
    #                target_list[w.word] += 1
    #            else:
    #                target_list[w.word] = 1

    ## 找出最大词频
    #sorted_lists = sorted(target_list.items(), key=lambda x: x[1], reverse=True)
    #去除问句中的关键词
    #sorted_lists2 = []
    # 候选队列
    #for i, st in enumerate(sorted_lists):
    #    if st[0] in keywords:
    #        continue
    #    else:
    #        sorted_lists2.append(st)
    ##log += ' '.join(sorted_lists2)
    ##print ("返回前n个词频")
    #answer = []
    #for i,st in enumerate(sorted_lists2):
    #    if i< 3:
    #        answer.append(st[0])
    #for ks in key_sentences:
    #    answer += ks + '\n'
    answer = text

    return (answer, log)

Ejemplo n.º 9

Mostrar archivo

Archivo: main.py Proyecto: zyyyyy/MillionHeroAssistant

def main():
    args = parse_args()
    timeout = args.timeout

    # 初始化jb分词器
    T.jieba_initialize()

    # 切换到语料库所在工作目录
    mybot_path = './'
    os.chdir(mybot_path)

    mybot = Kernel()
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/std-startup.xml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/OrdinaryQuestion.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/Common conversation.aiml")

    if enable_chrome:
        question_obj = Value(ctypes.c_char_p, "".encode("utf-8"))
        browser_daemon = multiprocessing.Process(target=run_browser,
                                                 args=(question_obj, ))
        browser_daemon.daemon = True
        browser_daemon.start()

    def __inner_job():
        start = time.time()
        text_binary = analyze_current_screen_text(
            directory=data_directory, compress_level=image_compress_level[0])

        keywords = get_text_from_image(image_data=text_binary, )
        if not keywords:
            print("text not recognize")
            return

        true_flag, question, answers = parse_question_and_answer(keywords)
        #questions=question.decode('unicode-escape')
        #new_ans=[]
        #for ans in answers:
        # new_ans.append(ans.decode('unicode-escape'))

        print('-' * 72)
        print(question)
        print('-' * 72)
        print("\n".join(answers))

        # notice browser
        if enable_chrome:
            with question_obj.get_lock():
                question_obj.value = question
                keyboard.press("space")

        search_question = pre_process_question(question)
        summary = baidu_count(search_question, answers, timeout=timeout)
        summary_li = sorted(summary.items(),
                            key=operator.itemgetter(1),
                            reverse=True)
        data = [("选项", "同比")]
        for a, w in summary_li:
            data.append((a, w))
        table = AsciiTable(data)
        print(table.table)

        print("*" * 72)
        if true_flag:
            print("肯定回答(**)： ", summary_li[0][0])
            print("否定回答(  )： ", summary_li[-1][0])
        else:
            print("肯定回答(  )： ", summary_li[0][0])
            print("否定回答(**)： ", summary_li[-1][0])
        print("*" * 72)

        ##############################################################
        input_message = question

        if len(input_message) > 60:
            print(mybot.respond("句子长度过长"))
        elif input_message.strip() == '':
            print(mybot.respond("无"))

        #print(input_message)
        message = T.wordSegment(input_message)
        # 去标点
        #print('word Seg:' + message)
        #print('词性：')
        words = T.postag(input_message)

        if message == 'q':
            exit()
        else:
            response = mybot.respond(message)

            #print("=======")
            #print(response)
            #print("=======")

            if response == "":
                ans = mybot.respond('找不到答案')
                print('Eric：' + ans)
            # 百科搜索
            elif response[0] == '#':
                # 匹配百科
                if response.__contains__("searchbaike"):
                    #print("searchbaike")
                    #print(response)
                    res = response.split(':')
                    # 实体
                    entity = str(res[1]).replace(" ", "")
                    # 属性
                    attr = str(res[2]).replace(" ", "")
                    #print(entity + '<---->' + attr)

                    ans = baike.query(entity, attr)
                    # 如果命中答案
                    if type(ans) == list:
                        print('Eric：' + QAT.ptranswer(ans, False))

                    elif ans.decode('utf-8').__contains__(u'::找不到'):
                        # 百度摘要+Bing摘要
                        print("通用搜索")
                        ans = search_summary.kwquery(input_message)

                # 匹配不到模版，通用查询
                elif response.__contains__("NoMatchingTemplate"):
                    #print("NoMatchingTemplate")
                    ans = search_summary.kwquery(input_message, answers)

                if len(ans) == 0:
                    print('Eric：' + '找不到答案')
                elif len(ans) > 1:
                    print("不确定候选答案")
                    print('Eric: ')
                    for a in ans:
                        print(a)
                else:
                    print('Eric：' + ans[0])

            # 匹配模版
            else:
                print('Eric：' + response)

        end = time.time()
        print("use {0} 秒".format(end - start))
        save_screen(directory=data_directory)

    while True:
        print("""
    请在答题开始前就运行程序，
    答题开始的时候按Enter预测答案
                """)

        enter = input("按Enter键开始，按ESC键退出...")
        print(enter)

        if enter == chr(27):
            break
        try:
            __inner_job()
        except Exception as e:
            print(str(e))

        print("欢迎下次使用")

Ejemplo n.º 10

Mostrar archivo

Archivo: main.py Proyecto: zyyyyy/MillionHeroAssistant

    def __inner_job():
        start = time.time()
        text_binary = analyze_current_screen_text(
            directory=data_directory, compress_level=image_compress_level[0])

        keywords = get_text_from_image(image_data=text_binary, )
        if not keywords:
            print("text not recognize")
            return

        true_flag, question, answers = parse_question_and_answer(keywords)
        #questions=question.decode('unicode-escape')
        #new_ans=[]
        #for ans in answers:
        # new_ans.append(ans.decode('unicode-escape'))

        print('-' * 72)
        print(question)
        print('-' * 72)
        print("\n".join(answers))

        # notice browser
        if enable_chrome:
            with question_obj.get_lock():
                question_obj.value = question
                keyboard.press("space")

        search_question = pre_process_question(question)
        summary = baidu_count(search_question, answers, timeout=timeout)
        summary_li = sorted(summary.items(),
                            key=operator.itemgetter(1),
                            reverse=True)
        data = [("选项", "同比")]
        for a, w in summary_li:
            data.append((a, w))
        table = AsciiTable(data)
        print(table.table)

        print("*" * 72)
        if true_flag:
            print("肯定回答(**)： ", summary_li[0][0])
            print("否定回答(  )： ", summary_li[-1][0])
        else:
            print("肯定回答(  )： ", summary_li[0][0])
            print("否定回答(**)： ", summary_li[-1][0])
        print("*" * 72)

        ##############################################################
        input_message = question

        if len(input_message) > 60:
            print(mybot.respond("句子长度过长"))
        elif input_message.strip() == '':
            print(mybot.respond("无"))

        #print(input_message)
        message = T.wordSegment(input_message)
        # 去标点
        #print('word Seg:' + message)
        #print('词性：')
        words = T.postag(input_message)

        if message == 'q':
            exit()
        else:
            response = mybot.respond(message)

            #print("=======")
            #print(response)
            #print("=======")

            if response == "":
                ans = mybot.respond('找不到答案')
                print('Eric：' + ans)
            # 百科搜索
            elif response[0] == '#':
                # 匹配百科
                if response.__contains__("searchbaike"):
                    #print("searchbaike")
                    #print(response)
                    res = response.split(':')
                    # 实体
                    entity = str(res[1]).replace(" ", "")
                    # 属性
                    attr = str(res[2]).replace(" ", "")
                    #print(entity + '<---->' + attr)

                    ans = baike.query(entity, attr)
                    # 如果命中答案
                    if type(ans) == list:
                        print('Eric：' + QAT.ptranswer(ans, False))

                    elif ans.decode('utf-8').__contains__(u'::找不到'):
                        # 百度摘要+Bing摘要
                        print("通用搜索")
                        ans = search_summary.kwquery(input_message)

                # 匹配不到模版，通用查询
                elif response.__contains__("NoMatchingTemplate"):
                    #print("NoMatchingTemplate")
                    ans = search_summary.kwquery(input_message, answers)

                if len(ans) == 0:
                    print('Eric：' + '找不到答案')
                elif len(ans) > 1:
                    print("不确定候选答案")
                    print('Eric: ')
                    for a in ans:
                        print(a)
                else:
                    print('Eric：' + ans[0])

            # 匹配模版
            else:
                print('Eric：' + response)

        end = time.time()
        print("use {0} 秒".format(end - start))
        save_screen(directory=data_directory)

Ejemplo n.º 11

Mostrar archivo

from Tools import TextProcess as T

from QuestionParser import aiml_parse
from AnswerGeneration import aiml_generate
import time

mybot = aiml.Kernel()
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/std-startup.xml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/OrdinaryQuestion.aiml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/Common conversation.aiml")

T.jieba_initialize()


def qa(question, T, mybot, QAT):
    q_parsed = aiml_parse.aiml_question_parsing(question, T, mybot)
    ans = aiml_generate.aiml_answer_generate(q_parsed, mybot, QAT, question)
    return ans

def code_format(s):
    try:
        s = s.encode('utf8')
    except:
        s = s
    return s

Ejemplo n.º 12

Mostrar archivo

def find_ans(question=''):
    global raw
    log = '答案来源：'
    cnt = 0
    input_message = question
    if len(input_message) > 60:
        return (mybot.respond("句子长度过长"), log)
    elif input_message.strip() == '':
        return (mybot.respond("无"), log)

    # 检索本地知识库得到答案，代码在QA1文件夹
    cnt += 1
    log += str(cnt) + ':检索本地知识库\n'
    old_client = Client()
    ans, log_tmp = old_client.qa_find_ans(input_message)
    log += log_tmp
    if ans != "不知道~":
        return (clean_str(ans), log)        
    log += '本地知识库找不到答案或答案不确定\n' 


    message = T.wordSegment(input_message)

    response = mybot.respond(message)
    # log += 'AIML模板返回内容:' + response + '\n'
    if response == "":
        ans = mybot.respond('不知道~')
        return (ans, log)
    elif response[0] == '#':
        cnt += 1
        log += str(cnt) + ':匹配不到问句模板\n'
        if response.__contains__("searchbaike"):
            res = response.split(':')
            entity = str(res[1]).replace(" ","")
            attr = str(res[2]).replace(" ","")
            cnt += 1
            log += str(cnt) + ':匹配到实体属性模板,' + '实体:' + entity + ' 属性:' + attr + '\n'
            ans = baike.query(entity, attr)
            if type(ans) == list:
                cnt += 1
                log += str(cnt) + ':来自百科Infobox\n'
                return (QAT.ptranswer(ans,False), log)
            elif '-找不到' in ans:
                cnt += 1
                log += str(cnt) + ':百科Infobox查询不到:' + ans + '\n'
                cnt += 1
                log += str(cnt) + '来自搜索\n'
                (ans, tmplog) = search_summary.kwquery(input_message)
                log += tmplog                    

        elif response.__contains__("NoMatchingTemplate"):
            cnt += 1
            log += str(cnt) + ':匹配不到实体关系模板\n'
            cnt += 1
            log += str(cnt) + ':来自搜索\n'
            (ans,tmplog) = search_summary.kwquery(input_message)
            log += tmplog

        if len(ans) == 0:
            cnt += 1
            log += str(cnt) + ':未查询到答案\n'
            return (mybot.respond('不知道~'), log)

        elif len(ans) >1:
            cnt += 1
            log += str(cnt) + ':返回百度摘要\n'
            if raw == False and ('什么是' in question or '是什么' in question):
                result = "给你找到几篇新闻："
                for a in ans:
                    result += a + '\n'
                return (result, log)
            else:
                raw = False
                question = question.replace("是什么", "").replace("什么是", "")
                ans2, log2 = find_ans(question + "是什么")
                ans1, log1 = find_ans("什么是" + question) 
                if "给你找到几篇新闻" not in ans1:
                    return (ans1, log1)
                else:
                    return (ans2, log2)
        else:
            return (clean_str(ans[0]), log)



    # 直接匹配问句模版
    else:
        cnt += 1
        log += str(cnt) + ':匹配问句模板\n'
        return (clean_str(response), log)

Ejemplo n.º 13

Mostrar archivo

Archivo: SqliteQA.py Proyecto: yangyuBUAA/rabbitQA

    def respond(self, text):

        question = list(TextProcess.postag(text))  # 对查询字符串进行分词
        keywords = []
        # print(question)
        logger.global_logger.info("query: {}  cut:{}".format(text, question))
        for word, tag in question:  # 去除停用词
            if word in self.stop_words:
                continue
            if 'n' not in tag or "un" == tag:  # and 'v' not in tag:
                # 保证名词进去keyword，即保证对象描述不会太远，后面再用语义匹配方法匹配出来
                continue
            keywords.append(word)
        if len(keywords) == 0:
            # 如果一个名词都没有，放动词
            for word, tag in question:  # 去除停用词
                if word in self.stop_words:
                    continue
                if 'v' not in tag:  # and 'v' not in tag:
                    # 保证名词进去keyword，即保证对象描述不会太远，后面再用语义匹配方法匹配出来
                    continue
                keywords.append(word)

        # 匹配keyword
        # condition = [" QUESTION like \'%{}%\'".format(keyword) for keyword in keywords] # 慢
        condition = [
            " instr(QUESTION, '{}') > 0 ".format(keyword)
            for keyword in keywords
        ]  # 快
        if len(condition) == 0:
            return []
        sql = "select QUESTION ,ANSWER from qa_pair where {}".format(
            "and".join(condition))
        logger.global_logger.info("going to execute this sql: {}".format(sql))
        result = self.cursor.execute(sql)  # (id ,q,a)
        res = []
        # 计算所有问题和问句得相似度,排序
        for row in result:
            q = row[0]
            a = row[1]
            state, sim = self._similarity(text, q)
            # logger.global_logger.info("text:{}  query:{} score:{}".format(text, q, sim))
            if state == 0:
                raise Exception("similarity Api Error.")
            elif sim > 0.9:
                res.append((q, a, sim))
        # 挑选得分第一的返回（可以并列）
        finall = []
        if len(res) > 0:
            ans = sorted(res, key=lambda x: x[2], reverse=True)
            score = -1
            for a in ans:
                if a[2] > score:
                    score = a[2]
                    logger.global_logger.info(
                        "[MATCH RESULT]{} match:{} score:{}".format(
                            text, a[0], score))
                    finall.append((a[0], a[1]))
                elif a[2] == score:
                    finall.append((a[0], a[1]))
        # else:
        #     finall.append(("", ""))
        return finall

Ejemplo n.º 14

Mostrar archivo

def kwquery(query, answers):
    # 分词 去停用词 抽取关键词
    keywords = []
    words = T.postag(query)
    for k in words:
        # 只保留名词
        if k.flag.__contains__("n"):
            # print k.flag
            # print k.word
            keywords.append(k.word)

    answer = []
    text = ''
    # 找到答案就置1
    flag = 0

    # 抓取百度前10条的摘要
    soup_baidu = To.get_html_baidu('https://www.baidu.com/s?wd=' +
                                   parse.quote(query))

    for i in range(1, 10):
        if soup_baidu == None:
            break
        results = soup_baidu.find(id=i)

        if results == None:
            print
            "百度摘要找不到答案"
            break
        # print '============='
        # print results.attrs
        # print type(results.attrs)
        # print results['class']
        # 判断是否有mu,如果第一个是百度知识图谱的 就直接命中答案
        if 'mu' in results.attrs and i == 1:
            # print results.attrs["mu"]
            r = results.find(class_='op_exactqa_s_answer')
            if r == None:
                print
                "百度知识图谱找不到答案"
            else:
                # print r.get_text()
                print
                "百度知识图谱找到答案"
                answer.append(r.get_text().strip())
                flag = 1
                break

        # 古诗词判断
        if 'mu' in results.attrs and i == 1:
            r = results.find(class_="op_exactqa_detail_s_answer")
            if r == None:
                print
                "百度诗词找不到答案"
            else:
                # print r.get_text()
                print
                "百度诗词找到答案"
                answer.append(r.get_text().strip())
                flag = 1
                break

        # 万年历 & 日期
        if 'mu' in results.attrs and i == 1 and results.attrs[
                'mu'].__contains__('http://open.baidu.com/calendar'):
            r = results.find(class_="op-calendar-content")
            if r == None:
                print
                "百度万年历找不到答案"
            else:
                # print r.get_text()
                print
                "百度万年历找到答案"
                answer.append(r.get_text().strip().replace("\n", "").replace(
                    " ", ""))
                flag = 1
                break

        if 'tpl' in results.attrs and i == 1 and results.attrs[
                'tpl'].__contains__('calendar_new'):
            r = results.attrs['fk'].replace("6018_", "")
            print
            r

            if r == None:
                print
                "百度万年历新版找不到答案"
                # continue
            else:
                # print r.get_text()
                print
                "百度万年历新版找到答案"
                answer.append(r)
                flag = 1
                break

        # 计算器
        if 'mu' in results.attrs and i == 1 and results.attrs[
                'mu'].__contains__(
                    'http://open.baidu.com/static/calculator/calculator.html'):
            r = results.find(class_="op_new_val_screen_result")
            if r == None:
                print
                "计算器找不到答案"
                # continue
            else:
                # print r.get_text()
                print
                "计算器找到答案"
                answer.append(r.get_text().strip())
                flag = 1
                break

        # 百度知道答案
        if 'mu' in results.attrs and i == 1:
            r = results.find(class_='op_best_answer_question_link')
            if r == None:
                print
                "百度知道图谱找不到答案"
            else:
                print
                "百度知道图谱找到答案"
                url = r['href']
                zhidao_soup = To.get_html_zhidao(url)
                r = zhidao_soup.find(class_='bd answer').find('pre')
                if r == None:
                    r = zhidao_soup.find(class_='bd answer').find(
                        class_='line content')

                answer.append(r.get_text())
                flag = 1
                break

        if results.find("h3") != None:
            # 百度知道
            if results.find("h3").find("a").get_text().__contains__(
                    u"百度知道") and (i == 1 or i == 2):
                url = results.find("h3").find("a")['href']
                if url == None:
                    print
                    "百度知道图谱找不到答案"
                    continue
                else:
                    print
                    "百度知道图谱找到答案"
                    zhidao_soup = To.get_html_zhidao(url)

                    r = zhidao_soup.find(class_='bd answer')
                    if r == None:
                        continue
                    else:
                        r = r.find('pre')
                        if r == None:
                            r = zhidao_soup.find(class_='bd answer').find(
                                class_='line content')
                    answer.append(r.get_text().strip())
                    flag = 1
                    break

            # 百度百科
            if results.find("h3").find("a").get_text().__contains__(
                    u"百度百科") and (i == 1 or i == 2):
                url = results.find("h3").find("a")['href']
                if url == None:
                    print
                    "百度百科找不到答案"
                    continue
                else:
                    print
                    "百度百科找到答案"
                    baike_soup = To.get_html_baike(url)

                    r = baike_soup.find(class_='lemma-summary')
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n", "").strip()
                    answer.append(r)
                    flag = 1
                    break
        text += results.get_text()

    if flag == 1:
        return answer

    # 获取bing的摘要
    soup_bing = To.get_html_bing('https://www.bing.com/search?q=' +
                                 parse.quote(query))
    # 判断是否在Bing的知识图谱中
    # bingbaike = soup_bing.find(class_="b_xlText b_emphText")
    bingbaike = soup_bing.find(class_="bm_box")

    if bingbaike != None:
        if bingbaike.find_all(class_="b_vList")[1] != None:
            if bingbaike.find_all(class_="b_vList")[1].find("li") != None:
                print
                "Bing知识图谱找到答案"
                flag = 1
                answer.append(bingbaike.get_text())
                # print "====="
                # print answer
                # print "====="
                return answer
    else:
        print
        "Bing知识图谱找不到答案"
        results = soup_bing.find(id="b_results")
        bing_list = results.find_all('li')
        for bl in bing_list:
            temp = bl.get_text()
            if temp.__contains__(u" - 必应网典"):
                print
                "查找Bing网典"
                url = bl.find("h2").find("a")['href']
                if url == None:
                    print
                    "Bing网典找不到答案"
                    continue
                else:
                    print
                    "Bing网典找到答案"
                    bingwd_soup = To.get_html_bingwd(url)

                    r = bingwd_soup.find(class_='bk_card_desc').find("p")
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n", "").strip()
                    answer.append(r)
                    flag = 1
                    break

        if flag == 1:
            return answer

        text += results.get_text()

    # print text

    # 如果再两家搜索引擎的知识图谱中都没找到答案，那么就分析摘要
    if flag == 0:
        # 分句
        cutlist = [u"。", u"?", u".", u"_", u"-", u":", u"！", u"？"]
        temp = ''
        sentences = []
        for i in range(0, len(text)):
            if text[i] in cutlist:
                if temp == '':
                    continue
                else:
                    # print temp
                    sentences.append(temp)
                temp = ''
            else:
                temp += text[i]

        # 找到含有关键词的句子,去除无关的句子
        key_sentences = {}
        for s in sentences:
            for k in keywords:
                if k in s:
                    key_sentences[s] = 1

        # 根据问题制定规则

        target_list = {}
        for Hans in answers:
            for sentence in key_sentences:
                if Hans in sentence:
                    if Hans in target_list:
                        target_list[Hans] += 1
                    else:
                        target_list[Hans] = 1
        print(target_list)

        # 找出最大词频
        ##print(target_list.items())
        sorted_lists = sorted(target_list.items(),
                              key=lambda item: item[1],
                              reverse=True)
        # print len(target_list)
        # 去除问句中的关键词
        sorted_lists2 = []
        # 候选队列
        for i, st in enumerate(sorted_lists):
            # print st[0]
            if st[0] in keywords and st[0] not in answers:
                continue
            else:
                sorted_lists2.append(st)

        print
        "返回前n个词频"
        answer = []
        for i, st in enumerate(sorted_lists2):
            # print st[0]
            # print st[1]
            if i < 3:
                # print st[0]
                # print st[1]
                answer.append(st[0])
                # print answer

    return answer

Ejemplo n.º 15

Mostrar archivo

    def respond(self, query):
        """
        采用文本摘要等技术来完成更广泛提问的总结
        :param query:
        :return:
        """
        # 查找百度
        url = 'https://www.baidu.com/s?wd=' + quote(query)
        # print(url)
        t1 = time.time()
        soup_baidu = Html_Tools.get_html_baidu(url)
        # print("Query Baidu:{}".format(time.time() - t1))
        contents = []
        key_word = list(TextProcess.postag(query))  # (word tag)
        key_word = [
            word for word, tag in key_word
            if (word not in self.stop_word and "n" in tag)
        ]
        t1 = time.time()
        for i in range(1, 10):
            # print("content -- {}".format(i))
            if soup_baidu == None:
                break

            results = soup_baidu.find(id=i)

            if results == None:
                # ("Id{}找不到".format(i))
                continue
            infos = results.find_all('h3')

            for info in infos:

                tag = info.find("a")
                if tag is None:
                    continue
                else:
                    href = tag['href']
                    if "www.baidu.com/link" not in href:
                        continue
                    try:
                        sub_soup = Html_Tools.get_html(href)
                        info_list = self._extract(sub_soup)
                        # 句子级的过滤
                        for info in info_list:
                            # 问句过滤
                            if any(["?" in info, "？" in info]):
                                continue
                            else:
                                contents.append(info)
                    except:
                        pass
        # print("For :{}".format((time.time() - t1) / 10))
        if len(contents) > 0:
            t1 = time.time()
            key_sentence = self._get_key_sentence(list(set(contents)),
                                                  key_word)
            # print("Key Sentence:{}".format(time.time() - t1))
        else:
            key_sentence = []
        # print()
        return key_sentence

Ejemplo n.º 16

Mostrar archivo

def kwquery(query):
    # 分词 去停用词 抽取关键词
    keywords = []
    words = T.postag(query)
    for k in words:
        # 只保留名词
        if k.flag.__contains__("n"):
            # print k.flag
            # print k.word
            keywords.append(k.word)

    answer = []
    text = ''
    # 找到答案就置1
    flag = 0

    # 抓取百度前10条的摘要
    url = 'https://www.baidu.com/s?wd=' + quote(query)
    # print(url)
    soup_baidu = To.get_html_baidu(url)
    for i in range(1, 10):
        # print("content -- {}".format(i))
        if soup_baidu == None:
            break

        results = soup_baidu.find(id=i)

        if results == None:
            print("Id{}找不到".format(i))
            continue

        # 判断是否有mu,如果第一个是百度知识图谱的 就直接命中答案
        text += results.get_text()
        if 'mu' in results.attrs:  # 一般在前三条
            # print results.attrs["mu"]
            r = results.find(class_='op_exactqa_s_answer')
            if r == None:
                # print("百度知识图谱找不到答案")
                pass
            else:
                # print r.get_text()
                # print("百度知识图谱找到答案")
                answer.append(r.get_text().strip().replace("  ", ""))
                flag = 1
                break

        # 电影栏目
        if 'mu' in results.attrs and results.attrs['mu'].__contains__(
                "http://nourl.baidu.com/"):
            if results.find(class_="c-gap-top-small") is not None:
                items = results.find_all(class_="c-gap-top-small")
                for item in items:
                    if item.find("a") is not None:
                        answer.append(item.find("a").get_text())
                flag = 1
                break
            else:
                pass

        # 天气判断
        weather_list = results.find_all(
            class_="op_weather4_twoicon_today OP_LOG_LINK")  # 今天的天气
        if len(weather_list) > 0:
            # print("百度天气找到了")
            weather_info = weather_list[0]
            date = weather_info.find(
                class_="op_weather4_twoicon_date").get_text().strip()
            C = weather_info.find(
                class_="op_weather4_twoicon_temp").get_text().strip()
            rain_or_not = weather_info.find(
                class_="op_weather4_twoicon_weath").get_text().strip()
            wind = weather_info.find(
                class_="op_weather4_twoicon_wind").get_text().strip()
            ans = "{}\t{}\t{}\t{}".format(date, C, rain_or_not, wind)
            answer.append(ans)
            # 获取未来的天气
            weather_list = results.find_all(
                class_="op_weather4_twoicon_day OP_LOG_LINK")  # 未来的天气
            for weather_info in weather_list:
                # print(weather_info)
                date = weather_info.find(
                    class_="op_weather4_twoicon_date_day").get_text().strip()
                C = weather_info.find(
                    class_="op_weather4_twoicon_temp").get_text().strip()
                rain_or_not = weather_info.find(
                    class_="op_weather4_twoicon_weath").get_text().strip()
                wind = weather_info.find(
                    class_="op_weather4_twoicon_wind").get_text().strip()
                ans = "{}\t{}\t{}\t{}".format(date, C, rain_or_not, wind)
                answer.append(ans)
            flag = 1
            break
        else:
            # print("百度天气找不到")
            pass

        # 古诗词判断
        if 'mu' in results.attrs:
            r = results.find(class_="op_exactqa_detail_s_answer")
            if r == None:
                # print("百度诗词找不到答案")
                pass
            else:
                # print r.get_text()
                # print("百度诗词找到答案")
                answer.append(r.get_text().strip())
                flag = 1
                break

        # 万年历 & 日期
        if 'mu' in results.attrs and i == 1 and results.attrs[
                'mu'].__contains__('http://open.baidu.com/calendar'):
            r = results.find(class_="op-calendar-content")
            if r == None:
                # print("百度万年历找不到答案")
                pass
            else:
                # print r.get_text()
                # print("百度万年历找到答案")
                answer.append(r.get_text().strip().replace("\n", "").replace(
                    " ", ""))
                flag = 1
                break

        # if 'tpl' in results.attrs and i == 1 and results.attrs['tpl'].__contains__('calendar_new'):
        #     # print(results)
        #     r = None  # results.attrs['fk'].replace("6018_", "")
        #     print(r)
        #
        #     if r == None:
        #         print("百度万年历新版找不到答案")
        #         # continue
        #     else:
        #         # print r.get_text()
        #         print("百度万年历新版找到答案")
        #         answer.append(r)
        #         flag = 1
        #         break

        # 计算器
        if 'mu' in results.attrs and results.attrs['mu'].__contains__(
                'http://open.baidu.com/static/calculator/calculator.html'):
            # r = results.find('div').find_all('td')[1].find_all('div')[1]
            r = results.find(class_="op_new_val_screen_result")
            if r == None:
                # print("计算器找不到答案")
                pass
                # continue
            else:
                # print r.get_text()
                # print("计算器找到答案")
                answer.append(r.get_text().strip())
                flag = 1
                break

        # 百度知道答案
        # if 'mu' in results.attrs:
        #     r = results.find(class_='op_best_answer_question_link')
        #     if r == None:
        #         print("百度知道图谱找不到答案")
        #     else:
        #         print("百度知道图谱找到答案")
        #         url = r['href']
        #         zhidao_soup = To.get_html_zhidao(url)
        #         r = zhidao_soup.find(class_='bd answer').find('pre')
        #         if r == None:
        #             r = zhidao_soup.find(class_='bd answer').find(class_='line content')
        #
        #         answer.append(r.get_text())
        #         flag = 1
        #         break
        #
        if results.find("h3") != None:
            # 百度知道
            # if results.find("h3").find("a").get_text().__contains__("百度知道") and (i == 1 or i == 2):
            #     url = results.find("h3").find("a")['href']
            #     if url == None:
            #         # print("百度知道图谱找不到答案")
            #         continue  # 当前id只会存在一个h3，没有答案则进入下一个id找
            #     else:
            #         # print("百度知道图谱找到答案")
            #         zhidao_soup = To.get_html_zhidao(url)
            #         r = zhidao_soup.find(class_='bd answer')
            #         if r == None:
            #             continue
            #         else:
            #             r = r.find('pre')
            #             if r == None:
            #                 r = zhidao_soup.find(class_='bd answer').find(class_='line content')
            #         text = r.get_text().strip()
            #         answer.append(text)
            #         flag = 1
            #         break

            # 百度百科
            link = results.find("h3").find("a")
            if link is not None and link.get_text().__contains__("百度百科"):
                url = results.find("h3").find("a")['href']
                if url == None:
                    # print("百度百科找不到答案")
                    continue
                else:
                    # print("百度百科找到答案")
                    baike_soup = To.get_html_baike(url)

                    r = baike_soup.find(class_='lemma-summary')
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n", "").strip()
                    answer.append(r)
                    flag = 1
                    break

        text += results.get_text()

    if flag == 1:
        return answer

    # 获取bing的摘要
    soup_bing = To.get_html_bing('https://www.bing.com/search?q=' +
                                 quote(query))
    # 判断是否在Bing的知识图谱中
    # bingbaike = soup_bing.find(class_="b_xlText b_emphText")
    bingbaike = soup_bing.find(class_="bm_box")

    if bingbaike != None:
        if bingbaike.find_all(class_="b_vList")[1] != None:
            if bingbaike.find_all(class_="b_vList")[1].find("li") != None:
                # print("Bing知识图谱找到答案")
                flag = 1
                answer.append(bingbaike.get_text())
                # print "====="
                # print answer
                # print "====="
                return answer
    else:
        # print("Bing知识图谱找不到答案")
        results = soup_bing.find(id="b_results")
        bing_list = results.find_all('li')
        for bl in bing_list:
            temp = bl.get_text()
            if temp.__contains__(" - 必应网典"):
                print("查找Bing网典")
                url = bl.find("h2").find("a")['href']
                if url == None:
                    # print("Bing网典找不到答案")
                    continue
                else:
                    # print("Bing网典找到答案")
                    bingwd_soup = To.get_html_bingwd(url)

                    r = bingwd_soup.find(class_='bk_card_desc').find("p")
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n", "").strip()
                    answer.append(r)
                    flag = 1
                    break

        if flag == 1:
            return answer

            # text += results.get_text()
    # 如果再两家搜索引擎的知识图谱中都没找到答案，
    # answer.append("")
    return answer