Python TextProcess Examples

Programming Language: Python

Namespace/Package Name: Tools

Class/Type: TextProcess

Examples at hotexamples.com: 16

Python TextProcess - 16 examples found. These are the top rated real world Python examples of Tools.TextProcess extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

postag(8)

jieba_initialize(4)

wordSegment(4)

cut(3)

load_baikeattr_name(2)

load_synonyms_word_inattr(2)

Example #1

Show file

File: baike.py Project: zyyyyy/MillionHeroAssistant

def query(entity, attr):
    soup = To.get_html_baidu("http://baike.baidu.com/item/" + entity)
    basicInfo_block = soup.find(class_='basic-info cmn-clearfix')
    if basicInfo_block == None:
        # print 'info None'
        return attr + "::找不到"
    else:
        info = get_info(basicInfo_block)
        # for i in info:
        #     print i
        #     print info[i]
        # print '-----------'
        if info.has_key(attr.decode('utf8')):
            # print 'has key'+attr.decode('utf8')
            return info[attr.decode('utf8')]
        else:
            # print 'no key 进行同义词判断'
            # 同义词判断
            attr_list = T.load_baikeattr_name(
                os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) +
                '/resources/Attribute_name.txt')
            attr = T.load_synonyms_word_inattr(
                attr,
                os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) +
                '/resources/SynonDic.txt', attr_list)
            if info.has_key(attr.decode('utf8')):
                return info[attr.decode('utf8')]
            else:
                return attr + "::找不到"

Example #2

Show file

def query(entity, attr):
    entity_uri = 'http://baike.baidu.com/item/' + entity
    result = '查询百科列表实体:' + entity_uri + '\n'
    soup = To.get_html_baidu(entity_uri)
    basicInfo_block = soup.find(class_='basic-info cmn-clearfix')
    if basicInfo_block == None:
        return result + entity + "-找不到\n"
    else:
        info = get_info(basicInfo_block)
        if attr in info:
            return info[attr]
        else:
            # 同义词判断
            result += '属性' + attr + '-找不到\n'
            attr_list = T.load_baikeattr_name(
                os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) +
                '/resources/Attribute_name.txt')
            attr = T.load_synonyms_word_inattr(
                attr,
                os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) +
                '/resources/SynonDic.txt', attr_list)
            if attr in info:
                return info[attr]
            else:
                return result + '同义属性' + attr + '-找不到\n'

Example #3

Show file

 def _get_key_sentence(self, contents, query_cut):
     """
     获得关键语句作为答案
     :param contents: 句子集合
     :param query_cut: 问句提取关键词
     :return:
     """
     # 一个句子内有更高的高频词，说明句子的重要性更棒棒
     split_result = []  # 分词结果
     TF = {}
     IDF = {}
     TF_IDF = {}
     for s in contents:
         word_list = TextProcess.cut(s)
         word_list = list(
             set([word for word in word_list
                  if word not in self.stop_word]))
         split_result.append(word_list)
         for word in word_list:
             TF[word] = TF.get(word, 0) + 1
         for word in set(word_list):
             IDF[word] = IDF.get(word, 0) + 1  # 含该词的句子数，而不是出现的次数
     for k in TF:
         TF[k] = TF[k] / len(TF)
         IDF[k] = math.log(len(contents) / IDF[k])
         TF_IDF[k] = TF[k] * IDF[k]
     topic_word = sorted(TF_IDF, key=lambda k: TF_IDF[k], reverse=True)
     topic_word = topic_word[:self.topic]
     # print("Query:", query_cut)
     # print("Topic:", topic_word)
     # 得分 词的重要性是（用tf或tf-idf衡量）/句子长度
     score = []
     for i, word_list in enumerate(split_result):
         s = 0.
         if len(word_list) <= 1 or (len(word_list) == 2
                                    and word_list[1] == " "):
             # 只有一个词或者一个词加空格不太可能是答案
             continue
         # print("sentence:{}\nwortcut:{}".format(contents[i], word_list))
         for word in word_list:
             w = 0
             if word in query_cut:
                 # print("Word {} in query".format(word))
                 w += 0.5
             if word in topic_word:
                 # print("Word {} in topic".format(word))
                 w += 0.5
             s += TF_IDF[word] * w
         # s = s / len(word_list)
         score.append((i, s))
         # print("Score:{:.5f}".format(s))
         # print("-------------------------------------")
     score = sorted(score, key=lambda x: x[1], reverse=True)
     result = []
     if len(score) > self.n:
         score = score[:self.n]
     for pair in score:
         result.append(contents[pair[0]])
     return result

Example #4

Show file

File: codesearch.py Project: yangyuBUAA/rabbitQA

    def _get_key_sentence(self, contents):
        """

        获得关键语句作为答案
        :param contents: 句子集合
        :return:
        """
        # 一个句子内有更高的高频词，说明句子的重要性更棒棒
        split_result = []  # 分词结果
        TF = {}
        IDF = {}
        for s in contents:
            word_list = TextProcess.cut(s)
            word_list = [
                word for word in word_list if word not in self.stop_word
            ]
            split_result.append(word_list)
            for word in word_list:
                TF[word] = TF.get(word, 0) + 1
            for word in set(word_list):
                IDF[word] = IDF.get(word, 0) + 1  # 含该词的句子数，而不是出现的次数
        for k in TF:
            TF[k] = TF[k] / len(TF)
            IDF[k] = math.log(len(contents) / IDF[k])
        # 得分 词的重要性是（用tf或tf-idf衡量）/句子长度
        score = []
        for i, word_list in enumerate(split_result):
            s = 0.

            if len(word_list) <= 1 or (len(word_list) == 2
                                       and word_list[1] == " "):
                # 只有一个词或者一个词加空格不太可能是答案
                continue
            alpha_num = 0.  # 考虑答案多为代码和命令含英文和符号超过一定比例的的权重增加
            for word in word_list:
                if self._judge_pure_english(word):
                    alpha_num += 1
                s += TF[word] * IDF[word]

            if alpha_num == 0:
                s = 0  # 一个英文字符都没，肯定是不需要的
            else:
                s = s / len(word_list)
                if (alpha_num / len(word_list)) > 0.5:
                    s = s * (1. + (alpha_num / len(word_list)))
                else:
                    s = s * (alpha_num / len(word_list))
            # print("word cut{} score:{} alpha:{}".format(word_list, s, alpha_num))
            score.append((i, s))
        score = sorted(score, key=lambda x: x[1], reverse=True)
        result = []
        if len(score) > self.n:
            score = score[:self.n]

        for pair in score:
            print(contents[pair[0]], pair[1])
            result.append(contents[pair[0]])
        return result

Example #5

Show file

File: SqliteQA.py Project: yangyuBUAA/rabbitQA

 def _similarity(self, t1, t2):
     """
     百度api存在qps的问题
     :param t1:
     :param t2:
     :return:
     """
     t1_list = [
         word for word in TextProcess.cut(t1) if word not in self.stop_word
     ]
     t2_list = [
         word for word in TextProcess.cut(t2) if word not in self.stop_word
     ]
     em1 = self.sentence_emb(t1_list)
     em2 = self.sentence_emb(t2_list)
     score = self.cos(em1, em2)
     # score = self.vector_similarity(t1_list, t2_list)
     score = score * 0.5 + 0.5  # 归一化
     return 1, score

Example #6

Show file

def qa(question):

    #初始化jieba分词器
    T.jieba_initialize()

    #切换到语料库所在工作目录
    mybot_path = './'
    # os.chdir(mybot_path)

    mybot = aiml.Kernel()
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/std-startup.xml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/OrdinaryQuestion.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/Common conversation.aiml")
    # mybot.respond('Load Doc Snake')
    #载入百科属性列表

    input_message = question

    if len(input_message) > 60:
        print(mybot.respond("句子长度过长"))
    elif input_message.strip() == '':
        print(mybot.respond("无"))

    print(input_message)
    message = T.wordSegment(input_message)
    # 去标点
    print('word Seg:' + message)
    print('词性：')
    words = T.postag(input_message)
    if message == 'q':
        exit()
    else:
        response = mybot.respond(message)

        print("=======")
        print(response)
        print("=======")

        if response == "":
            ans = mybot.respond('找不到答案')
            # print(robot_id + ":" + ans)
            print("{0}:{1}".format(robot_id, ans))
        # 百科搜索
        elif response[0] == '#':
            # 匹配百科
            if response.__contains__("searchbaike"):
                print("search from baike")
                print(response)
                res = response.split(':')
                #实体
                entity = str(res[1]).replace(" ", "")
                #属性
                attr = str(res[2]).replace(" ", "")
                print(entity + '<---->' + attr)

                ans = baike.query(entity, attr)
                # 如果命中答案
                if type(ans) == list:
                    print("{0}:{1}".format(robot_id, QAT.ptranswer(ans,
                                                                   False)))
                elif ans.decode('utf-8').__contains__(u'::找不到'):
                    #百度摘要+Bing摘要
                    print("通用搜索")
                    log.info("通用搜索")
                    ans = search_summary.kwquery(input_message)

            # 匹配不到模版，通用查询
            elif response.__contains__("NoMatchingTemplate"):
                print("NoMatchingTemplate")
                ans = search_summary.kwquery(input_message)

            if len(ans) == 0:
                ans = mybot.respond('找不到答案')
                logs.info("{0}:{1}".format(robot_id, ans))
            elif len(ans) > 1:
                logs.info(sys.exc_info())
                logs.info("不确定候选答案")
                logs.info("[{0}][func:{1}][line:{2}]:不确定候选答案".format(
                    sys._getframe().f_code.co_filename,
                    sys._getframe().f_code.co_name,
                    sys._getframe().f_lineno))
                print(robot_id + ': ')
                for a in ans:
                    print(a)
                    # print(a.encode("utf8"))
            else:
                print('{0}:{1}'.format(robot_id, ans[0]))

        # 匹配模版
        else:
            print("{}: {}".format(robot_id, response))

Example #7

Show file

def main():
    # 初始化jb分词器
    T.jieba_initialize()

    # 切换到语料库所在工作目录
    mybot_path = './'
    os.chdir(mybot_path)

    # 加载AIML的规则
    mybot = aiml.Kernel()
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/std-startup.xml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/abc.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/bot_profile.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/general.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/infor.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/main.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/new07281.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/salutations.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/virus0727.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/zextra_weibao.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/OrdinaryQuestion.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/Common conversation.aiml")

    # mybot.respond('Load Doc Snake')
    #载入百科属性列表

    #     print '''
    # .----------------.  .-----------------. .----------------.  .----------------.  .----------------.
    # | .--------------. || .--------------. || .--------------. || .--------------. || .--------------. |
    # | |    _______   | || | ____  _____  | || |      __      | || |  ___  ____   | || |  _________   | |
    # | |   /  ___  |  | || ||_   \|_   _| | || |     /  \     | || | |_  ||_  _|  | || | |_   ___  |  | |
    # | |  |  (__ \_|  | || |  |   \ | |   | || |    / /\ \    | || |   | |_/ /    | || |   | |_  \_|  | |
    # | |   '.___`-.   | || |  | |\ \| |   | || |   / /__\ \   | || |   |  __'.    | || |   |  _|  _   | |
    # | |  |`\____) |  | || | _| |_\   |_  | || | _/ /    \ \_ | || |  _| |  \ \_  | || |  _| |___/ |  | |
    # | |  |_______.'  | || ||_____|\____| | || ||____|  |____|| || | |____||____| | || | |_________|  | |
    # | |              | || |              | || |              | || |              | || |              | |
    # | '--------------' || '--------------' || '--------------' || '--------------' || '--------------' |
    #  '----------------'  '----------------'  '----------------'  '----------------'  '----------------'
    #  Eric：你好，我是Eric。╭(╯^╰)╮
    #     '''

    print('泰康小康：你好，我是小康。╭(╯^╰)╮')

    # 对问题的处理流程
    while True:
        # 输入这个问题
        input_message = raw_input("您想问什么 >> ")

        # 对输入问题进行简单的处理：
        # 忽略过长（超过60）的问题
        # 忽略空问题
        if len(input_message) > 60:
            print(mybot.respond("句子长度过长"))
            continue
        elif input_message.strip() == '':
            print(mybot.respond("无"))
            continue

        # 利用Tools工具对问题进行处理
        print(input_message)
        message = T.wordSegment(input_message)
        # 去标点
        print('word Seg:' + message)
        # print('词性：')
        words = T.postag(input_message)

        # 退出
        if message == 'q':
            exit()
        # 返回信息的优先级
        else:
            # 首先是AIML的模板匹配
            response = mybot.respond(message)

            print("=======")
            print(response)
            print(len(response.decode('utf8')))
            print("=======")

Example #8

Show file

def kwquery(query):
    #分词 去停用词 抽取关键词
    log = '提取关键词:'
    keywords = []
    words = T.postag(query)
    for k in words:
        # 只保留名词
        if k.flag.__contains__("n"):
            keywords.append(k.word)
            log += k.word
    log += '#' * 50 + '\n'
    answer = []
    text = []
    # 找到答案置1
    flag = 0

    # 抓取百度前10条的摘要
    log += '百度前10条的摘要'
    url = 'https://www.baidu.com/s?wd=' + quote(query)
    log += url + '#' * 50 + '\n'
    soup_baidu = To.get_html_baidu(url)

    for i in range(1, 11):
        if soup_baidu == None:
            break
        results = soup_baidu.find(id=i)

        if results == None:
            log += '百度摘要找不到答案' + '#' * 50 + '\n'
            break
        log += '第' + str(i) + '条摘要:\n'
        log += clean_str(results.get_text()) + '#' * 50 + '\n'
        #判断是否有mu,如果第一个是百度知识图谱的 就直接命中答案
        #if 'mu' in results.attrs and i == 1:
        if 'mu' in results.attrs and results.find(
                class_='op_exactqa_s_answer') != None:
            r = results.find(class_='op_exactqa_s_answer')
            log += '第一条百度摘要为百度搜索根据知识图谱直接匹配出的内容，优先查找\n'
            log += '百度知识图谱找到答案' + '#' * 50 + '\n'
            answer.append(r.get_text().strip())
            flag = 1
            break

        #古诗词判断
        if 'mu' in results.attrs and i == 1 and results.find(
                class_="op_exactqa_detail_s_answer") != None:
            r = results.find(class_="op_exactqa_detail_s_answer")
            log += '百度诗词找到答案' + '#' * 50 + '\n'
            answer.append(r.get_text().strip())
            flag = 1
            break

        #万年历 & 日期
        if 'mu' in results.attrs and i == 1 and results.attrs[
                'mu'].__contains__('http://open.baidu.com/calendar'):
            r = results.find(class_="op-calendar-content")
            if r != None:
                log += '百度万年历找到答案' + '#' * 50 + '\n'
                answer.append(r.get_text().strip().replace("\n", "").replace(
                    " ", ""))
                flag = 1
                break

        if 'tpl' in results.attrs and i == 1 and results.attrs[
                'tpl'].__contains__('calendar_new'):
            r = results.attrs['fk'].replace("6018_", "")
            if r != None:
                log += '百度万年历新版找到答案' + '#' * 50 + '\n'
                answer.append(r)
                flag = 1
                break

        #计算器
        if 'mu' in results.attrs and i == 1 and results.attrs[
                'mu'].__contains__(
                    'http://open.baidu.com/static/calculator/calculator.html'):
            r = results.find(class_="op_new_val_screen_result")
            if r != None:
                log += '计算器找到答案' + '#' * 50 + '\n'
                answer.append(r.get_text().strip())
                flag = 1
                break

        # 百度知道答案
        #if 'mu' in results.attrs and i == 1:
        if 'mu' in results.attrs and results.find(
                class_='op_best_answer_question_link') != None:
            r = results.find(class_='op_best_answer_question_link')
            url = r['href']
            zhidao_soup = To.get_html_zhidao(url)
            r = zhidao_soup.find(class_='bd answer').find('pre')
            if r == None:
                r = zhidao_soup.find(class_='bd answer').find(
                    class_='line content')
            log += '百度知道best answer找到答案' + '#' * 50 + '\n'
            answer.append(r.get_text())
            flag = 1
            break

        if results.find("h3") != None:
            # 百度知道
            if results.find("h3").find("a").get_text().__contains__(u"百度知道"):
                url = results.find("h3").find("a")['href']
                if url == None:
                    log += '百度知道找不到答案' + '#' * 50 + '\n'
                    continue
                else:
                    log += '百度知道找到答案' + '#' * 50 + '\n'
                    zhidao_soup = To.get_html_zhidao(url)
                    r = zhidao_soup.find(class_='bd answer')
                    if r == None:
                        continue
                    else:
                        r = r.find('pre')
                        if r == None:
                            r = zhidao_soup.find(class_='bd answer').find(
                                class_='line content')
                    answer.append(r.get_text().strip())
                    flag = 1
                    break

            # 百度百科
            if results.find("h3").find("a").get_text().__contains__(u"百度百科"):
                url = results.find("h3").find("a")['href']
                if url == None:
                    log += '百度百科找不到答案' + '#' * 50 + '\n'
                    continue
                else:
                    log += '百度百科找到答案' + '#' * 50 + '\n'
                    baike_soup = To.get_html_baike(url)

                    r = baike_soup.find(class_='lemma-summary')
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n", "").strip()
                    answer.append(r)
                    flag = 1
                    break
        text.append(
            clean_str(results.get_text()) + "(" + results.find("a")['href'] +
            ")")

    if flag == 1:
        return (answer, log)
    log += '百度前十条摘要不包含百度知道、百度百科内容，或相关内容中找不到答案' + '#' * 50 + '\n'

    #获取bing的摘要
    log += '通过Bing查找\n'
    url = 'https://www.bing.com/search?q=' + quote(query)
    log += url + '\n'
    log += '#' * 50
    soup_bing = To.get_html_bing(url)
    # 判断是否在Bing的知识图谱中
    # bingbaike = soup_bing.find(class_="b_xlText b_emphText")
    bingbaike = soup_bing.find(class_="bm_box")

    if bingbaike != None:
        if bingbaike.find_all(class_="b_vList")[1] != None:
            if bingbaike.find_all(class_="b_vList")[1].find("li") != None:
                log += 'Bing百科找到答案' + '#' * 50 + '\n'
                flag = 1
                answer.append(bingbaike.get_text())
                return (answer, log)
    else:
        log += 'Bing百科找不到答案' + '#' * 50 + '\n'
        results = soup_bing.find(id="b_results")
        bing_list = results.find_all('li')
        for bl in bing_list:
            temp = bl.get_text()
            if temp.__contains__(u" - 必应网典"):
                url = bl.find("h2").find("a")['href']
                if url == None:
                    log += 'Bing网典找不到答案' + '#' * 50 + '\n'
                    continue
                else:
                    log += 'Bing网典找到答案' + '#' * 50 + '\n'
                    bingwd_soup = To.get_html_bingwd(url)

                    r = bingwd_soup.find(class_='bk_card_desc').find("p")
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n", "").strip()
                    answer.append(r)
                    flag = 1
                    break

        if flag == 1:
            return (answer, log)

    log += '没有找到答案，返回百度前十条摘要内容\n'
    #if flag == 0:
    #分句
    #log += ''
    #cutlist = ["。", "?", ".", "_", "-", "：", "！", "？"]
    #temp = ''
    #sentences = []
    #for i in range(0,len(text)):
    #    if text[i] in cutlist:
    #         if temp == '':
    #             continue
    #         else:
    #             sentences.append(temp)
    #         temp = ''
    #     else:
    #         temp += text[i]
    #
    # # 找到含有关键词的句子,去除无关的句子
    # key_sentences = {}
    # for s in sentences:
    #     for k in keywords:
    #         if k in s:
    #             key_sentences[s]=1

    # 根据问题制定规则
    # 识别人名
    #target_list = {}
    #for ks in key_sentences:
    #    # print ks
    #    words = T.postag(ks)
    #    for w in words:
    #        if w.flag == ("nr"):
    #            if w.word in target_list:
    #                target_list[w.word] += 1
    #            else:
    #                target_list[w.word] = 1

    ## 找出最大词频
    #sorted_lists = sorted(target_list.items(), key=lambda x: x[1], reverse=True)
    #去除问句中的关键词
    #sorted_lists2 = []
    # 候选队列
    #for i, st in enumerate(sorted_lists):
    #    if st[0] in keywords:
    #        continue
    #    else:
    #        sorted_lists2.append(st)
    ##log += ' '.join(sorted_lists2)
    ##print ("返回前n个词频")
    #answer = []
    #for i,st in enumerate(sorted_lists2):
    #    if i< 3:
    #        answer.append(st[0])
    #for ks in key_sentences:
    #    answer += ks + '\n'
    answer = text

    return (answer, log)

Example #9

Show file

File: main.py Project: zyyyyy/MillionHeroAssistant

def main():
    args = parse_args()
    timeout = args.timeout

    # 初始化jb分词器
    T.jieba_initialize()

    # 切换到语料库所在工作目录
    mybot_path = './'
    os.chdir(mybot_path)

    mybot = Kernel()
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/std-startup.xml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/OrdinaryQuestion.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/Common conversation.aiml")

    if enable_chrome:
        question_obj = Value(ctypes.c_char_p, "".encode("utf-8"))
        browser_daemon = multiprocessing.Process(target=run_browser,
                                                 args=(question_obj, ))
        browser_daemon.daemon = True
        browser_daemon.start()

    def __inner_job():
        start = time.time()
        text_binary = analyze_current_screen_text(
            directory=data_directory, compress_level=image_compress_level[0])

        keywords = get_text_from_image(image_data=text_binary, )
        if not keywords:
            print("text not recognize")
            return

        true_flag, question, answers = parse_question_and_answer(keywords)
        #questions=question.decode('unicode-escape')
        #new_ans=[]
        #for ans in answers:
        # new_ans.append(ans.decode('unicode-escape'))

        print('-' * 72)
        print(question)
        print('-' * 72)
        print("\n".join(answers))

        # notice browser
        if enable_chrome:
            with question_obj.get_lock():
                question_obj.value = question
                keyboard.press("space")

        search_question = pre_process_question(question)
        summary = baidu_count(search_question, answers, timeout=timeout)
        summary_li = sorted(summary.items(),
                            key=operator.itemgetter(1),
                            reverse=True)
        data = [("选项", "同比")]
        for a, w in summary_li:
            data.append((a, w))
        table = AsciiTable(data)
        print(table.table)

        print("*" * 72)
        if true_flag:
            print("肯定回答(**)： ", summary_li[0][0])
            print("否定回答(  )： ", summary_li[-1][0])
        else:
            print("肯定回答(  )： ", summary_li[0][0])
            print("否定回答(**)： ", summary_li[-1][0])
        print("*" * 72)

        ##############################################################
        input_message = question

        if len(input_message) > 60:
            print(mybot.respond("句子长度过长"))
        elif input_message.strip() == '':
            print(mybot.respond("无"))

        #print(input_message)
        message = T.wordSegment(input_message)
        # 去标点
        #print('word Seg:' + message)
        #print('词性：')
        words = T.postag(input_message)

        if message == 'q':
            exit()
        else:
            response = mybot.respond(message)

            #print("=======")
            #print(response)
            #print("=======")

            if response == "":
                ans = mybot.respond('找不到答案')
                print('Eric：' + ans)
            # 百科搜索
            elif response[0] == '#':
                # 匹配百科
                if response.__contains__("searchbaike"):
                    #print("searchbaike")
                    #print(response)
                    res = response.split(':')
                    # 实体
                    entity = str(res[1]).replace(" ", "")
                    # 属性
                    attr = str(res[2]).replace(" ", "")
                    #print(entity + '<---->' + attr)

                    ans = baike.query(entity, attr)
                    # 如果命中答案
                    if type(ans) == list:
                        print('Eric：' + QAT.ptranswer(ans, False))

                    elif ans.decode('utf-8').__contains__(u'::找不到'):
                        # 百度摘要+Bing摘要
                        print("通用搜索")
                        ans = search_summary.kwquery(input_message)

                # 匹配不到模版，通用查询
                elif response.__contains__("NoMatchingTemplate"):
                    #print("NoMatchingTemplate")
                    ans = search_summary.kwquery(input_message, answers)

                if len(ans) == 0:
                    print('Eric：' + '找不到答案')
                elif len(ans) > 1:
                    print("不确定候选答案")
                    print('Eric: ')
                    for a in ans:
                        print(a)
                else:
                    print('Eric：' + ans[0])

            # 匹配模版
            else:
                print('Eric：' + response)

        end = time.time()
        print("use {0} 秒".format(end - start))
        save_screen(directory=data_directory)

    while True:
        print("""
    请在答题开始前就运行程序，
    答题开始的时候按Enter预测答案
                """)

        enter = input("按Enter键开始，按ESC键退出...")
        print(enter)

        if enter == chr(27):
            break
        try:
            __inner_job()
        except Exception as e:
            print(str(e))

        print("欢迎下次使用")

Example #10

Show file

File: main.py Project: zyyyyy/MillionHeroAssistant

    def __inner_job():
        start = time.time()
        text_binary = analyze_current_screen_text(
            directory=data_directory, compress_level=image_compress_level[0])

        keywords = get_text_from_image(image_data=text_binary, )
        if not keywords:
            print("text not recognize")
            return

        true_flag, question, answers = parse_question_and_answer(keywords)
        #questions=question.decode('unicode-escape')
        #new_ans=[]
        #for ans in answers:
        # new_ans.append(ans.decode('unicode-escape'))

        print('-' * 72)
        print(question)
        print('-' * 72)
        print("\n".join(answers))

        # notice browser
        if enable_chrome:
            with question_obj.get_lock():
                question_obj.value = question
                keyboard.press("space")

        search_question = pre_process_question(question)
        summary = baidu_count(search_question, answers, timeout=timeout)
        summary_li = sorted(summary.items(),
                            key=operator.itemgetter(1),
                            reverse=True)
        data = [("选项", "同比")]
        for a, w in summary_li:
            data.append((a, w))
        table = AsciiTable(data)
        print(table.table)

        print("*" * 72)
        if true_flag:
            print("肯定回答(**)： ", summary_li[0][0])
            print("否定回答(  )： ", summary_li[-1][0])
        else:
            print("肯定回答(  )： ", summary_li[0][0])
            print("否定回答(**)： ", summary_li[-1][0])
        print("*" * 72)

        ##############################################################
        input_message = question

        if len(input_message) > 60:
            print(mybot.respond("句子长度过长"))
        elif input_message.strip() == '':
            print(mybot.respond("无"))

        #print(input_message)
        message = T.wordSegment(input_message)
        # 去标点
        #print('word Seg:' + message)
        #print('词性：')
        words = T.postag(input_message)

        if message == 'q':
            exit()
        else:
            response = mybot.respond(message)

            #print("=======")
            #print(response)
            #print("=======")

            if response == "":
                ans = mybot.respond('找不到答案')
                print('Eric：' + ans)
            # 百科搜索
            elif response[0] == '#':
                # 匹配百科
                if response.__contains__("searchbaike"):
                    #print("searchbaike")
                    #print(response)
                    res = response.split(':')
                    # 实体
                    entity = str(res[1]).replace(" ", "")
                    # 属性
                    attr = str(res[2]).replace(" ", "")
                    #print(entity + '<---->' + attr)

                    ans = baike.query(entity, attr)
                    # 如果命中答案
                    if type(ans) == list:
                        print('Eric：' + QAT.ptranswer(ans, False))

                    elif ans.decode('utf-8').__contains__(u'::找不到'):
                        # 百度摘要+Bing摘要
                        print("通用搜索")
                        ans = search_summary.kwquery(input_message)

                # 匹配不到模版，通用查询
                elif response.__contains__("NoMatchingTemplate"):
                    #print("NoMatchingTemplate")
                    ans = search_summary.kwquery(input_message, answers)

                if len(ans) == 0:
                    print('Eric：' + '找不到答案')
                elif len(ans) > 1:
                    print("不确定候选答案")
                    print('Eric: ')
                    for a in ans:
                        print(a)
                else:
                    print('Eric：' + ans[0])

            # 匹配模版
            else:
                print('Eric：' + response)

        end = time.time()
        print("use {0} 秒".format(end - start))
        save_screen(directory=data_directory)

Example #11

Show file

from Tools import TextProcess as T

from QuestionParser import aiml_parse
from AnswerGeneration import aiml_generate
import time

mybot = aiml.Kernel()
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/std-startup.xml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/OrdinaryQuestion.aiml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/Common conversation.aiml")

T.jieba_initialize()


def qa(question, T, mybot, QAT):
    q_parsed = aiml_parse.aiml_question_parsing(question, T, mybot)
    ans = aiml_generate.aiml_answer_generate(q_parsed, mybot, QAT, question)
    return ans

def code_format(s):
    try:
        s = s.encode('utf8')
    except:
        s = s
    return s

Example #12

Show file

def find_ans(question=''):
    global raw
    log = '答案来源：'
    cnt = 0
    input_message = question
    if len(input_message) > 60:
        return (mybot.respond("句子长度过长"), log)
    elif input_message.strip() == '':
        return (mybot.respond("无"), log)

    # 检索本地知识库得到答案，代码在QA1文件夹
    cnt += 1
    log += str(cnt) + ':检索本地知识库\n'
    old_client = Client()
    ans, log_tmp = old_client.qa_find_ans(input_message)
    log += log_tmp
    if ans != "不知道~":
        return (clean_str(ans), log)        
    log += '本地知识库找不到答案或答案不确定\n' 


    message = T.wordSegment(input_message)

    response = mybot.respond(message)
    # log += 'AIML模板返回内容:' + response + '\n'
    if response == "":
        ans = mybot.respond('不知道~')
        return (ans, log)
    elif response[0] == '#':
        cnt += 1
        log += str(cnt) + ':匹配不到问句模板\n'
        if response.__contains__("searchbaike"):
            res = response.split(':')
            entity = str(res[1]).replace(" ","")
            attr = str(res[2]).replace(" ","")
            cnt += 1
            log += str(cnt) + ':匹配到实体属性模板,' + '实体:' + entity + ' 属性:' + attr + '\n'
            ans = baike.query(entity, attr)
            if type(ans) == list:
                cnt += 1
                log += str(cnt) + ':来自百科Infobox\n'
                return (QAT.ptranswer(ans,False), log)
            elif '-找不到' in ans:
                cnt += 1
                log += str(cnt) + ':百科Infobox查询不到:' + ans + '\n'
                cnt += 1
                log += str(cnt) + '来自搜索\n'
                (ans, tmplog) = search_summary.kwquery(input_message)
                log += tmplog                    

        elif response.__contains__("NoMatchingTemplate"):
            cnt += 1
            log += str(cnt) + ':匹配不到实体关系模板\n'
            cnt += 1
            log += str(cnt) + ':来自搜索\n'
            (ans,tmplog) = search_summary.kwquery(input_message)
            log += tmplog

        if len(ans) == 0:
            cnt += 1
            log += str(cnt) + ':未查询到答案\n'
            return (mybot.respond('不知道~'), log)

        elif len(ans) >1:
            cnt += 1
            log += str(cnt) + ':返回百度摘要\n'
            if raw == False and ('什么是' in question or '是什么' in question):
                result = "给你找到几篇新闻："
                for a in ans:
                    result += a + '\n'
                return (result, log)
            else:
                raw = False
                question = question.replace("是什么", "").replace("什么是", "")
                ans2, log2 = find_ans(question + "是什么")
                ans1, log1 = find_ans("什么是" + question) 
                if "给你找到几篇新闻" not in ans1:
                    return (ans1, log1)
                else:
                    return (ans2, log2)
        else:
            return (clean_str(ans[0]), log)



    # 直接匹配问句模版
    else:
        cnt += 1
        log += str(cnt) + ':匹配问句模板\n'
        return (clean_str(response), log)

Example #13

Show file

File: SqliteQA.py Project: yangyuBUAA/rabbitQA

    def respond(self, text):

        question = list(TextProcess.postag(text))  # 对查询字符串进行分词
        keywords = []
        # print(question)
        logger.global_logger.info("query: {}  cut:{}".format(text, question))
        for word, tag in question:  # 去除停用词
            if word in self.stop_words:
                continue
            if 'n' not in tag or "un" == tag:  # and 'v' not in tag:
                # 保证名词进去keyword，即保证对象描述不会太远，后面再用语义匹配方法匹配出来
                continue
            keywords.append(word)
        if len(keywords) == 0:
            # 如果一个名词都没有，放动词
            for word, tag in question:  # 去除停用词
                if word in self.stop_words:
                    continue
                if 'v' not in tag:  # and 'v' not in tag:
                    # 保证名词进去keyword，即保证对象描述不会太远，后面再用语义匹配方法匹配出来
                    continue
                keywords.append(word)

        # 匹配keyword
        # condition = [" QUESTION like \'%{}%\'".format(keyword) for keyword in keywords] # 慢
        condition = [
            " instr(QUESTION, '{}') > 0 ".format(keyword)
            for keyword in keywords
        ]  # 快
        if len(condition) == 0:
            return []
        sql = "select QUESTION ,ANSWER from qa_pair where {}".format(
            "and".join(condition))
        logger.global_logger.info("going to execute this sql: {}".format(sql))
        result = self.cursor.execute(sql)  # (id ,q,a)
        res = []
        # 计算所有问题和问句得相似度,排序
        for row in result:
            q = row[0]
            a = row[1]
            state, sim = self._similarity(text, q)
            # logger.global_logger.info("text:{}  query:{} score:{}".format(text, q, sim))
            if state == 0:
                raise Exception("similarity Api Error.")
            elif sim > 0.9:
                res.append((q, a, sim))
        # 挑选得分第一的返回（可以并列）
        finall = []
        if len(res) > 0:
            ans = sorted(res, key=lambda x: x[2], reverse=True)
            score = -1
            for a in ans:
                if a[2] > score:
                    score = a[2]
                    logger.global_logger.info(
                        "[MATCH RESULT]{} match:{} score:{}".format(
                            text, a[0], score))
                    finall.append((a[0], a[1]))
                elif a[2] == score:
                    finall.append((a[0], a[1]))
        # else:
        #     finall.append(("", ""))
        return finall

Example #14

Show file

def kwquery(query, answers):
    # 分词 去停用词 抽取关键词
    keywords = []
    words = T.postag(query)
    for k in words:
        # 只保留名词
        if k.flag.__contains__("n"):
            # print k.flag
            # print k.word
            keywords.append(k.word)

    answer = []
    text = ''
    # 找到答案就置1
    flag = 0

    # 抓取百度前10条的摘要
    soup_baidu = To.get_html_baidu('https://www.baidu.com/s?wd=' +
                                   parse.quote(query))

    for i in range(1, 10):
        if soup_baidu == None:
            break
        results = soup_baidu.find(id=i)

        if results == None:
            print
            "百度摘要找不到答案"
            break
        # print '============='
        # print results.attrs
        # print type(results.attrs)
        # print results['class']
        # 判断是否有mu,如果第一个是百度知识图谱的 就直接命中答案
        if 'mu' in results.attrs and i == 1:
            # print results.attrs["mu"]
            r = results.find(class_='op_exactqa_s_answer')
            if r == None:
                print
                "百度知识图谱找不到答案"
            else:
                # print r.get_text()
                print
                "百度知识图谱找到答案"
                answer.append(r.get_text().strip())
                flag = 1
                break

        # 古诗词判断
        if 'mu' in results.attrs and i == 1:
            r = results.find(class_="op_exactqa_detail_s_answer")
            if r == None:
                print
                "百度诗词找不到答案"
            else:
                # print r.get_text()
                print
                "百度诗词找到答案"
                answer.append(r.get_text().strip())
                flag = 1
                break

        # 万年历 & 日期
        if 'mu' in results.attrs and i == 1 and results.attrs[
                'mu'].__contains__('http://open.baidu.com/calendar'):
            r = results.find(class_="op-calendar-content")
            if r == None:
                print
                "百度万年历找不到答案"
            else:
                # print r.get_text()
                print
                "百度万年历找到答案"
                answer.append(r.get_text().strip().replace("\n", "").replace(
                    " ", ""))
                flag = 1
                break

        if 'tpl' in results.attrs and i == 1 and results.attrs[
                'tpl'].__contains__('calendar_new'):
            r = results.attrs['fk'].replace("6018_", "")
            print
            r

            if r == None:
                print
                "百度万年历新版找不到答案"
                # continue
            else:
                # print r.get_text()
                print
                "百度万年历新版找到答案"
                answer.append(r)
                flag = 1
                break

        # 计算器
        if 'mu' in results.attrs and i == 1 and results.attrs[
                'mu'].__contains__(
                    'http://open.baidu.com/static/calculator/calculator.html'):
            r = results.find(class_="op_new_val_screen_result")
            if r == None:
                print
                "计算器找不到答案"
                # continue
            else:
                # print r.get_text()
                print
                "计算器找到答案"
                answer.append(r.get_text().strip())
                flag = 1
                break

        # 百度知道答案
        if 'mu' in results.attrs and i == 1:
            r = results.find(class_='op_best_answer_question_link')
            if r == None:
                print
                "百度知道图谱找不到答案"
            else:
                print
                "百度知道图谱找到答案"
                url = r['href']
                zhidao_soup = To.get_html_zhidao(url)
                r = zhidao_soup.find(class_='bd answer').find('pre')
                if r == None:
                    r = zhidao_soup.find(class_='bd answer').find(
                        class_='line content')

                answer.append(r.get_text())
                flag = 1
                break

        if results.find("h3") != None:
            # 百度知道
            if results.find("h3").find("a").get_text().__contains__(
                    u"百度知道") and (i == 1 or i == 2):
                url = results.find("h3").find("a")['href']
                if url == None:
                    print
                    "百度知道图谱找不到答案"
                    continue
                else:
                    print
                    "百度知道图谱找到答案"
                    zhidao_soup = To.get_html_zhidao(url)

                    r = zhidao_soup.find(class_='bd answer')
                    if r == None:
                        continue
                    else:
                        r = r.find('pre')
                        if r == None:
                            r = zhidao_soup.find(class_='bd answer').find(
                                class_='line content')
                    answer.append(r.get_text().strip())
                    flag = 1
                    break

            # 百度百科
            if results.find("h3").find("a").get_text().__contains__(
                    u"百度百科") and (i == 1 or i == 2):
                url = results.find("h3").find("a")['href']
                if url == None:
                    print
                    "百度百科找不到答案"
                    continue
                else:
                    print
                    "百度百科找到答案"
                    baike_soup = To.get_html_baike(url)

                    r = baike_soup.find(class_='lemma-summary')
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n", "").strip()
                    answer.append(r)
                    flag = 1
                    break
        text += results.get_text()

    if flag == 1:
        return answer

    # 获取bing的摘要
    soup_bing = To.get_html_bing('https://www.bing.com/search?q=' +
                                 parse.quote(query))
    # 判断是否在Bing的知识图谱中
    # bingbaike = soup_bing.find(class_="b_xlText b_emphText")
    bingbaike = soup_bing.find(class_="bm_box")

    if bingbaike != None:
        if bingbaike.find_all(class_="b_vList")[1] != None:
            if bingbaike.find_all(class_="b_vList")[1].find("li") != None:
                print
                "Bing知识图谱找到答案"
                flag = 1
                answer.append(bingbaike.get_text())
                # print "====="
                # print answer
                # print "====="
                return answer
    else:
        print
        "Bing知识图谱找不到答案"
        results = soup_bing.find(id="b_results")
        bing_list = results.find_all('li')
        for bl in bing_list:
            temp = bl.get_text()
            if temp.__contains__(u" - 必应网典"):
                print
                "查找Bing网典"
                url = bl.find("h2").find("a")['href']
                if url == None:
                    print
                    "Bing网典找不到答案"
                    continue
                else:
                    print
                    "Bing网典找到答案"
                    bingwd_soup = To.get_html_bingwd(url)

                    r = bingwd_soup.find(class_='bk_card_desc').find("p")
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n", "").strip()
                    answer.append(r)
                    flag = 1
                    break

        if flag == 1:
            return answer

        text += results.get_text()

    # print text

    # 如果再两家搜索引擎的知识图谱中都没找到答案，那么就分析摘要
    if flag == 0:
        # 分句
        cutlist = [u"。", u"?", u".", u"_", u"-", u":", u"！", u"？"]
        temp = ''
        sentences = []
        for i in range(0, len(text)):
            if text[i] in cutlist:
                if temp == '':
                    continue
                else:
                    # print temp
                    sentences.append(temp)
                temp = ''
            else:
                temp += text[i]

        # 找到含有关键词的句子,去除无关的句子
        key_sentences = {}
        for s in sentences:
            for k in keywords:
                if k in s:
                    key_sentences[s] = 1

        # 根据问题制定规则

        target_list = {}
        for Hans in answers:
            for sentence in key_sentences:
                if Hans in sentence:
                    if Hans in target_list:
                        target_list[Hans] += 1
                    else:
                        target_list[Hans] = 1
        print(target_list)

        # 找出最大词频
        ##print(target_list.items())
        sorted_lists = sorted(target_list.items(),
                              key=lambda item: item[1],
                              reverse=True)
        # print len(target_list)
        # 去除问句中的关键词
        sorted_lists2 = []
        # 候选队列
        for i, st in enumerate(sorted_lists):
            # print st[0]
            if st[0] in keywords and st[0] not in answers:
                continue
            else:
                sorted_lists2.append(st)

        print
        "返回前n个词频"
        answer = []
        for i, st in enumerate(sorted_lists2):
            # print st[0]
            # print st[1]
            if i < 3:
                # print st[0]
                # print st[1]
                answer.append(st[0])
                # print answer

    return answer

Example #15

Show file

    def respond(self, query):
        """
        采用文本摘要等技术来完成更广泛提问的总结
        :param query:
        :return:
        """
        # 查找百度
        url = 'https://www.baidu.com/s?wd=' + quote(query)
        # print(url)
        t1 = time.time()
        soup_baidu = Html_Tools.get_html_baidu(url)
        # print("Query Baidu:{}".format(time.time() - t1))
        contents = []
        key_word = list(TextProcess.postag(query))  # (word tag)
        key_word = [
            word for word, tag in key_word
            if (word not in self.stop_word and "n" in tag)
        ]
        t1 = time.time()
        for i in range(1, 10):
            # print("content -- {}".format(i))
            if soup_baidu == None:
                break

            results = soup_baidu.find(id=i)

            if results == None:
                # ("Id{}找不到".format(i))
                continue
            infos = results.find_all('h3')

            for info in infos:

                tag = info.find("a")
                if tag is None:
                    continue
                else:
                    href = tag['href']
                    if "www.baidu.com/link" not in href:
                        continue
                    try:
                        sub_soup = Html_Tools.get_html(href)
                        info_list = self._extract(sub_soup)
                        # 句子级的过滤
                        for info in info_list:
                            # 问句过滤
                            if any(["?" in info, "？" in info]):
                                continue
                            else:
                                contents.append(info)
                    except:
                        pass
        # print("For :{}".format((time.time() - t1) / 10))
        if len(contents) > 0:
            t1 = time.time()
            key_sentence = self._get_key_sentence(list(set(contents)),
                                                  key_word)
            # print("Key Sentence:{}".format(time.time() - t1))
        else:
            key_sentence = []
        # print()
        return key_sentence

Example #16

Show file

def kwquery(query):
    # 分词 去停用词 抽取关键词
    keywords = []
    words = T.postag(query)
    for k in words:
        # 只保留名词
        if k.flag.__contains__("n"):
            # print k.flag
            # print k.word
            keywords.append(k.word)

    answer = []
    text = ''
    # 找到答案就置1
    flag = 0

    # 抓取百度前10条的摘要
    url = 'https://www.baidu.com/s?wd=' + quote(query)
    # print(url)
    soup_baidu = To.get_html_baidu(url)
    for i in range(1, 10):
        # print("content -- {}".format(i))
        if soup_baidu == None:
            break

        results = soup_baidu.find(id=i)

        if results == None:
            print("Id{}找不到".format(i))
            continue

        # 判断是否有mu,如果第一个是百度知识图谱的 就直接命中答案
        text += results.get_text()
        if 'mu' in results.attrs:  # 一般在前三条
            # print results.attrs["mu"]
            r = results.find(class_='op_exactqa_s_answer')
            if r == None:
                # print("百度知识图谱找不到答案")
                pass
            else:
                # print r.get_text()
                # print("百度知识图谱找到答案")
                answer.append(r.get_text().strip().replace("  ", ""))
                flag = 1
                break

        # 电影栏目
        if 'mu' in results.attrs and results.attrs['mu'].__contains__(
                "http://nourl.baidu.com/"):
            if results.find(class_="c-gap-top-small") is not None:
                items = results.find_all(class_="c-gap-top-small")
                for item in items:
                    if item.find("a") is not None:
                        answer.append(item.find("a").get_text())
                flag = 1
                break
            else:
                pass

        # 天气判断
        weather_list = results.find_all(
            class_="op_weather4_twoicon_today OP_LOG_LINK")  # 今天的天气
        if len(weather_list) > 0:
            # print("百度天气找到了")
            weather_info = weather_list[0]
            date = weather_info.find(
                class_="op_weather4_twoicon_date").get_text().strip()
            C = weather_info.find(
                class_="op_weather4_twoicon_temp").get_text().strip()
            rain_or_not = weather_info.find(
                class_="op_weather4_twoicon_weath").get_text().strip()
            wind = weather_info.find(
                class_="op_weather4_twoicon_wind").get_text().strip()
            ans = "{}\t{}\t{}\t{}".format(date, C, rain_or_not, wind)
            answer.append(ans)
            # 获取未来的天气
            weather_list = results.find_all(
                class_="op_weather4_twoicon_day OP_LOG_LINK")  # 未来的天气
            for weather_info in weather_list:
                # print(weather_info)
                date = weather_info.find(
                    class_="op_weather4_twoicon_date_day").get_text().strip()
                C = weather_info.find(
                    class_="op_weather4_twoicon_temp").get_text().strip()
                rain_or_not = weather_info.find(
                    class_="op_weather4_twoicon_weath").get_text().strip()
                wind = weather_info.find(
                    class_="op_weather4_twoicon_wind").get_text().strip()
                ans = "{}\t{}\t{}\t{}".format(date, C, rain_or_not, wind)
                answer.append(ans)
            flag = 1
            break
        else:
            # print("百度天气找不到")
            pass

        # 古诗词判断
        if 'mu' in results.attrs:
            r = results.find(class_="op_exactqa_detail_s_answer")
            if r == None:
                # print("百度诗词找不到答案")
                pass
            else:
                # print r.get_text()
                # print("百度诗词找到答案")
                answer.append(r.get_text().strip())
                flag = 1
                break

        # 万年历 & 日期
        if 'mu' in results.attrs and i == 1 and results.attrs[
                'mu'].__contains__('http://open.baidu.com/calendar'):
            r = results.find(class_="op-calendar-content")
            if r == None:
                # print("百度万年历找不到答案")
                pass
            else:
                # print r.get_text()
                # print("百度万年历找到答案")
                answer.append(r.get_text().strip().replace("\n", "").replace(
                    " ", ""))
                flag = 1
                break

        # if 'tpl' in results.attrs and i == 1 and results.attrs['tpl'].__contains__('calendar_new'):
        #     # print(results)
        #     r = None  # results.attrs['fk'].replace("6018_", "")
        #     print(r)
        #
        #     if r == None:
        #         print("百度万年历新版找不到答案")
        #         # continue
        #     else:
        #         # print r.get_text()
        #         print("百度万年历新版找到答案")
        #         answer.append(r)
        #         flag = 1
        #         break

        # 计算器
        if 'mu' in results.attrs and results.attrs['mu'].__contains__(
                'http://open.baidu.com/static/calculator/calculator.html'):
            # r = results.find('div').find_all('td')[1].find_all('div')[1]
            r = results.find(class_="op_new_val_screen_result")
            if r == None:
                # print("计算器找不到答案")
                pass
                # continue
            else:
                # print r.get_text()
                # print("计算器找到答案")
                answer.append(r.get_text().strip())
                flag = 1
                break

        # 百度知道答案
        # if 'mu' in results.attrs:
        #     r = results.find(class_='op_best_answer_question_link')
        #     if r == None:
        #         print("百度知道图谱找不到答案")
        #     else:
        #         print("百度知道图谱找到答案")
        #         url = r['href']
        #         zhidao_soup = To.get_html_zhidao(url)
        #         r = zhidao_soup.find(class_='bd answer').find('pre')
        #         if r == None:
        #             r = zhidao_soup.find(class_='bd answer').find(class_='line content')
        #
        #         answer.append(r.get_text())
        #         flag = 1
        #         break
        #
        if results.find("h3") != None:
            # 百度知道
            # if results.find("h3").find("a").get_text().__contains__("百度知道") and (i == 1 or i == 2):
            #     url = results.find("h3").find("a")['href']
            #     if url == None:
            #         # print("百度知道图谱找不到答案")
            #         continue  # 当前id只会存在一个h3，没有答案则进入下一个id找
            #     else:
            #         # print("百度知道图谱找到答案")
            #         zhidao_soup = To.get_html_zhidao(url)
            #         r = zhidao_soup.find(class_='bd answer')
            #         if r == None:
            #             continue
            #         else:
            #             r = r.find('pre')
            #             if r == None:
            #                 r = zhidao_soup.find(class_='bd answer').find(class_='line content')
            #         text = r.get_text().strip()
            #         answer.append(text)
            #         flag = 1
            #         break

            # 百度百科
            link = results.find("h3").find("a")
            if link is not None and link.get_text().__contains__("百度百科"):
                url = results.find("h3").find("a")['href']
                if url == None:
                    # print("百度百科找不到答案")
                    continue
                else:
                    # print("百度百科找到答案")
                    baike_soup = To.get_html_baike(url)

                    r = baike_soup.find(class_='lemma-summary')
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n", "").strip()
                    answer.append(r)
                    flag = 1
                    break

        text += results.get_text()

    if flag == 1:
        return answer

    # 获取bing的摘要
    soup_bing = To.get_html_bing('https://www.bing.com/search?q=' +
                                 quote(query))
    # 判断是否在Bing的知识图谱中
    # bingbaike = soup_bing.find(class_="b_xlText b_emphText")
    bingbaike = soup_bing.find(class_="bm_box")

    if bingbaike != None:
        if bingbaike.find_all(class_="b_vList")[1] != None:
            if bingbaike.find_all(class_="b_vList")[1].find("li") != None:
                # print("Bing知识图谱找到答案")
                flag = 1
                answer.append(bingbaike.get_text())
                # print "====="
                # print answer
                # print "====="
                return answer
    else:
        # print("Bing知识图谱找不到答案")
        results = soup_bing.find(id="b_results")
        bing_list = results.find_all('li')
        for bl in bing_list:
            temp = bl.get_text()
            if temp.__contains__(" - 必应网典"):
                print("查找Bing网典")
                url = bl.find("h2").find("a")['href']
                if url == None:
                    # print("Bing网典找不到答案")
                    continue
                else:
                    # print("Bing网典找到答案")
                    bingwd_soup = To.get_html_bingwd(url)

                    r = bingwd_soup.find(class_='bk_card_desc').find("p")
                    if r == None:
                        continue
                    else:
                        r = r.get_text().replace("\n", "").strip()
                    answer.append(r)
                    flag = 1
                    break

        if flag == 1:
            return answer

            # text += results.get_text()
    # 如果再两家搜索引擎的知识图谱中都没找到答案，
    # answer.append("")
    return answer