Exemple #1
0
    def __init__(self, corpus_file, word2id):
        time_s = time.time()
        self.contexts, self.responses = load_corpus_file(corpus_file,
                                                         word2id,
                                                         size=50000)

        self._train_model()
        self.corpus_mm = self.tfidf_model[self.corpus]
        self.index = similarities.MatrixSimilarity(self.corpus_mm)
        logger.debug("Time to build tfidf model by %s: %2.f seconds." %
                     (corpus_file, time.time() - time_s))
Exemple #2
0
 def search(self, sqls):
     """
     执行cypher查询,并返回相应结果
     :param sqls:
     :return:
     """
     final_answers = []
     logger.debug("input sqls: %s" % sqls)
     for sql_dict in sqls:
         question_type = sql_dict['question_type']
         queries = sql_dict['sql']
         answers = []
         for query in queries:
             ress = self.g.run(query).data()
             answers += ress
         final_answer = self.answer_prettify(question_type, answers)
         if final_answer:
             final_answers.append(final_answer)
     logger.debug("cypher result: %s" % final_answers)
     return final_answers
Exemple #3
0
 def answer(self, query):
     self.last_txt.append(query)
     logger.debug('-' * 20)
     logger.debug("init_query=%s" % query)
     response = self.m.predict(query)
     logger.debug("seq2seq_response=%s" % response)
     self.last_txt.append(response)
     return response
Exemple #4
0
    def search_bing(query):
        """
        通过bing检索答案,包括bing知识图谱、bing网典
        :param query:
        :return: list, string
        """
        answer = []
        left_text = ''
        # 获取bing的摘要
        soup_bing = html_crawler.get_html_bing(bing_url_prefix + quote(query))
        # 判断是否在Bing的知识图谱中
        r = soup_bing.find(class_="bm_box")

        if r:
            r = r.find_all(class_="b_vList")
            if r and len(r) > 1:
                r = r[1].find("li").get_text().strip()
                if r:
                    answer.append(r)
                    logger.debug("Bing知识图谱找到答案")
                    return answer, left_text
        else:
            r = soup_bing.find(id="b_results")
            if r:
                bing_list = r.find_all('li')
                for bl in bing_list:
                    temp = bl.get_text()
                    if temp.__contains__(" - 必应网典"):
                        logger.debug("查找Bing网典")
                        url = bl.find("h2").find("a")['href']
                        if url:
                            bingwd_soup = html_crawler.get_html_bingwd(url)
                            r = bingwd_soup.find(class_='bk_card_desc').find("p")
                            if r:
                                r = r.get_text().replace("\n", "").strip()
                                if r:
                                    logger.debug("Bing网典找到答案")
                                    answer.append(r)
                                    return answer, left_text
                left_text += r.get_text()
        return answer, left_text
Exemple #5
0
    def search_baidu(self, query):
        """
        通过baidu检索答案,包括百度知识图谱、百度诗词、百度万年历、百度计算器、百度知道
        :param query:
        :return: list, string
        """
        answer = []
        left_text = ''
        # 抓取百度前10条的摘要
        soup_baidu = html_crawler.get_html_baidu(baidu_url_prefix + quote(query))
        if not soup_baidu:
            return answer, left_text
        for i in range(1, self.topk):
            items = soup_baidu.find(id=i)
            print(i, items)
            if not items:
                logger.debug("百度找不到答案")
                break
            # 判断是否有mu,如果第一个是百度知识图谱的 就直接命中答案
            if ('mu' in items.attrs) and i == 1:
                r = items.find(class_='op_exactqa_s_answer')
                if r:
                    logger.debug("百度知识图谱找到答案")
                    answer.append(r.get_text().strip())
                    return answer, left_text

            # 古诗词判断
            if ('mu' in items.attrs) and i == 1:
                r = items.find(class_="op_exactqa_detail_s_answer")
                if r:
                    logger.debug("百度诗词找到答案")
                    answer.append(r.get_text().strip())
                    return answer, left_text

            # 万年历 & 日期
            if ('mu' in items.attrs) and i == 1 and items.attrs['mu'].__contains__(calendar_url):
                r = items.find(class_="op-calendar-content")
                if r:
                    logger.debug("百度万年历找到答案")
                    answer.append(r.get_text().strip().replace("\n", "").replace(" ", ""))
                    return answer, left_text

            if ('tpl' in items.attrs) and i == 1 and items.attrs['tpl'].__contains__('calendar_new'):
                r = items.attrs['fk'].replace("6018_", "")
                logger.debug(r)
                if r:
                    logger.debug("百度万年历新版找到答案")
                    answer.append(r)
                    return answer, left_text

            # 计算器
            if ('mu' in items.attrs) and i == 1 and items.attrs['mu'].__contains__(calculator_url):
                r = items.find(class_="op_new_val_screen_result")
                if r:
                    logger.debug("计算器找到答案")
                    answer.append(r.get_text().strip())
                    return answer, left_text

            # 天气
            if ('mu' in items.attrs) and i == 1 and items.attrs['mu'].__contains__(weather_url):
                r = items.find(class_="op_weather4_twoicon_today")
                if r:
                    logger.debug("天气找到答案")
                    answer.append(r.get_text().replace('\n', '').strip())
                    return answer, left_text
            # 百度知道
            if ('mu' in items.attrs) and i == 1:
                r = items.find(class_='op_best_answer_question_link')
                if r:
                    zhidao_soup = html_crawler.get_html_zhidao(r['href'])
                    r = zhidao_soup.find(class_='bd answer').find('pre')
                    if not r:
                        r = zhidao_soup.find(class_='bd answer').find(class_='line content').find(
                            class_="best-text mb-10")
                    if r:
                        logger.debug("百度知道找到答案")
                        answer.append(r.get_text().strip().replace("展开全部", "").strip())
                        return answer, left_text

            if items.find("h3"):
                # 百度知道
                if items.find("h3").find("a").get_text().__contains__("百度知道") and (i == 1 or i == 2):
                    url = items.find("h3").find("a")['href']
                    if url:
                        zhidao_soup = html_crawler.get_html_zhidao(url)
                        r = zhidao_soup.find(class_='bd answer')
                        if r:
                            r = r.find('pre')
                            if not r:
                                r = zhidao_soup.find(class_='bd answer').find(class_='line content').find(
                                    class_="best-text mb-10")
                            if r:
                                logger.debug("百度知道找到答案")
                                answer.append(r.get_text().strip().replace("展开全部", "").strip())
                                return answer, left_text

                # 百度百科
                if items.find("h3").find("a").get_text().__contains__("百度百科") and (i == 1 or i == 2):
                    url = items.find("h3").find("a")['href']
                    if url:
                        logger.debug("百度百科找到答案")
                        baike_soup = html_crawler.get_html_baike(url)

                        r = baike_soup.find(class_='lemma-summary')
                        if r:
                            answer.append(r.get_text().replace("\n", "").strip())
                            return answer, left_text
            left_text += items.get_text()
        return answer, left_text
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description: 
"""
import sys
sys.path.append("..")
from dialogbot.searchdialog.internet.search_engine import Engine
from dialogbot.utils.logger import logger

if __name__ == '__main__':
    engine = Engine()
    logger.debug(engine.search("北京今天天气如何?"))
    logger.debug(engine.search("上海呢?"))
    logger.debug(engine.search("武汉呢?"))
    logger.debug(engine.search("武汉明天呢?"))
    ans = engine.search("貂蝉是谁")
    logger.debug(ans)
    ans = engine.search("西施是谁")
    logger.debug(ans)
    ans = engine.search("你知道我是谁")
    logger.debug(ans)
    context = engine.contents
    print(context)
Exemple #7
0
 def __init__(self, corpus_file, word2id):
     time_s = time.time()
     self.contexts, self.responses = load_corpus_file(corpus_file, word2id)
     logger.debug("Time to build onehot model by %s : %2.f seconds." %
                  (corpus_file, time.time() - time_s))
Exemple #8
0
    def answer_prettify(self, question_type, answers):
        """
        根据不同的qustion_type,调用相应的回复模板
        :param question_type: 问题类型
        :param answers: 回答
        :return:
        """
        final_answer = ""
        if not answers:
            return final_answer
        logger.debug("original answer: %s" % answers)
        if question_type == 'disease_symptom':
            desc = [i['n.name'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}的症状包括:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'symptom_disease':
            desc = [i['m.name'] for i in answers]
            subject = answers[0]['n.name']
            final_answer = '症状{0}可能染上的疾病有:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_cause':
            desc = [i['m.cause'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}可能的成因有:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_prevent':
            desc = [i['m.prevent'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}的预防措施包括:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_lasttime':
            desc = [i['m.cure_lasttime'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}治疗可能持续的周期为:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_cureway':
            desc = [';'.join(i['m.cure_way']) for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}可以尝试如下治疗:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_cureprob':
            desc = [i['m.cured_prob'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}治愈的概率为(仅供参考):{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_easyget':
            desc = [i['m.easy_get'] for i in answers]
            subject = answers[0]['m.name']

            final_answer = '{0}的易感人群包括:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_desc':
            desc = [i['m.desc'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0},熟悉一下:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_acompany':
            desc1 = [i['n.name'] for i in answers]
            desc2 = [i['m.name'] for i in answers]
            subject = answers[0]['m.name']
            desc = [i for i in desc1 + desc2 if i != subject]
            final_answer = '{0}的症状包括:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_not_food':
            desc = [i['n.name'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}忌食的食物包括有:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_do_food':
            do_desc = [i['n.name'] for i in answers if i['r.name'] == '宜吃']
            recommand_desc = [
                i['n.name'] for i in answers if i['r.name'] == '推荐食谱'
            ]
            subject = answers[0]['m.name']
            final_answer = '{0}宜食的食物包括有:{1}\n推荐食谱包括有:{2}'.format(
                subject, ';'.join(list(set(do_desc))[:self.num_limit]),
                ';'.join(list(set(recommand_desc))[:self.num_limit]))

        elif question_type == 'food_not_disease':
            desc = [i['m.name'] for i in answers]
            subject = answers[0]['n.name']
            final_answer = '患有{0}的人最好不要吃{1}'.format(
                ';'.join(list(set(desc))[:self.num_limit]), subject)

        elif question_type == 'food_do_disease':
            desc = [i['m.name'] for i in answers]
            subject = answers[0]['n.name']
            final_answer = '患有{0}的人建议多试试{1}'.format(
                ';'.join(list(set(desc))[:self.num_limit]), subject)

        elif question_type == 'disease_drug':
            desc = [i['n.name'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}通常的使用的药品包括:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'drug_disease':
            desc = [i['m.name'] for i in answers]
            subject = answers[0]['n.name']
            final_answer = '{0}主治的疾病有{1},可以试试'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_check':
            desc = [i['n.name'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}通常可以通过以下方式检查出来:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'check_disease':
            desc = [i['m.name'] for i in answers]
            subject = answers[0]['n.name']
            final_answer = '通常可以通过{0}检查出来的疾病有{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))
        logger.debug("apply template answer: %s" % final_answer)
        return final_answer
Exemple #9
0
    def parser(self, res_classify):
        """
        解析主函数
        :param res_classify:
        :return:
        """
        args = res_classify['args']
        entity_dict = self.build_entitydict(args)
        question_types = res_classify['question_types']
        sqls = []
        for question_type in question_types:
            sql_dict = {}
            sql_dict['question_type'] = question_type
            sql = []
            if question_type == 'disease_symptom':
                sql = self.sql_transfer(question_type,
                                        entity_dict.get('disease'))

            elif question_type == 'symptom_disease':
                sql = self.sql_transfer(question_type,
                                        entity_dict.get('symptom'))

            elif question_type == 'disease_cause':
                sql = self.sql_transfer(question_type,
                                        entity_dict.get('disease'))

            elif question_type == 'disease_acompany':
                sql = self.sql_transfer(question_type,
                                        entity_dict.get('disease'))

            elif question_type == 'disease_not_food':
                sql = self.sql_transfer(question_type,
                                        entity_dict.get('disease'))

            elif question_type == 'disease_do_food':
                sql = self.sql_transfer(question_type,
                                        entity_dict.get('disease'))

            elif question_type == 'food_not_disease':
                sql = self.sql_transfer(question_type, entity_dict.get('food'))

            elif question_type == 'food_do_disease':
                sql = self.sql_transfer(question_type, entity_dict.get('food'))

            elif question_type == 'disease_drug':
                sql = self.sql_transfer(question_type,
                                        entity_dict.get('disease'))

            elif question_type == 'drug_disease':
                sql = self.sql_transfer(question_type, entity_dict.get('drug'))

            elif question_type == 'disease_check':
                sql = self.sql_transfer(question_type,
                                        entity_dict.get('disease'))

            elif question_type == 'check_disease':
                sql = self.sql_transfer(question_type,
                                        entity_dict.get('check'))

            elif question_type == 'disease_prevent':
                sql = self.sql_transfer(question_type,
                                        entity_dict.get('disease'))

            elif question_type == 'disease_lasttime':
                sql = self.sql_transfer(question_type,
                                        entity_dict.get('disease'))

            elif question_type == 'disease_cureway':
                sql = self.sql_transfer(question_type,
                                        entity_dict.get('disease'))

            elif question_type == 'disease_cureprob':
                sql = self.sql_transfer(question_type,
                                        entity_dict.get('disease'))

            elif question_type == 'disease_easyget':
                sql = self.sql_transfer(question_type,
                                        entity_dict.get('disease'))

            elif question_type == 'disease_desc':
                sql = self.sql_transfer(question_type,
                                        entity_dict.get('disease'))

            if sql:
                sql_dict['sql'] = sql
                logger.debug("sql dict: %s" % sql_dict)
                sqls.append(sql_dict)

        return sqls
Exemple #10
0
    def classify(self, question):
        """
        分类主函数
        :param question:
        :return:
        """
        data = {}
        medical_dict = self.check_medical(question)
        logger.debug("medical dict: %s" % medical_dict)
        if not medical_dict:
            return {}
        data['args'] = medical_dict
        # 收集问句当中所涉及到的实体类型
        types = []
        for t in medical_dict.values():
            # values 是list类型
            types += t

        question_types = []

        # 症状
        if self.check_words(self.symptom_qwds, question) and ('disease'
                                                              in types):
            question_type = 'disease_symptom'
            question_types.append(question_type)

        if self.check_words(self.symptom_qwds, question) and ('symptom'
                                                              in types):
            question_type = 'symptom_disease'
            question_types.append(question_type)

        # 原因
        if self.check_words(self.cause_qwds, question) and ('disease'
                                                            in types):
            question_type = 'disease_cause'
            question_types.append(question_type)
        # 并发症
        if self.check_words(self.acompany_qwds, question) and ('disease'
                                                               in types):
            question_type = 'disease_acompany'
            question_types.append(question_type)

        # 推荐食品
        if self.check_words(self.food_qwds, question) and 'disease' in types:
            deny_status = self.check_words(self.deny_words, question)
            if deny_status:
                question_type = 'disease_not_food'
            else:
                question_type = 'disease_do_food'
            question_types.append(question_type)

        # 已知食物找疾病
        if self.check_words(self.food_qwds + self.cure_qwds,
                            question) and 'food' in types:
            deny_status = self.check_words(self.deny_words, question)
            if deny_status:
                question_type = 'food_not_disease'
            else:
                question_type = 'food_do_disease'
            question_types.append(question_type)

        # 推荐药品
        if self.check_words(self.drug_qwds, question) and 'disease' in types:
            question_type = 'disease_drug'
            question_types.append(question_type)

        # 药品治啥病
        if self.check_words(self.cure_qwds, question) and 'drug' in types:
            question_type = 'drug_disease'
            question_types.append(question_type)

        # 疾病接受检查项目
        if self.check_words(self.check_qwds, question) and 'disease' in types:
            question_type = 'disease_check'
            question_types.append(question_type)

        # 已知检查项目查相应疾病
        if self.check_words(self.check_qwds + self.cure_qwds,
                            question) and 'check' in types:
            question_type = 'check_disease'
            question_types.append(question_type)

        # 症状防御
        if self.check_words(self.prevent_qwds,
                            question) and 'disease' in types:
            question_type = 'disease_prevent'
            question_types.append(question_type)

        # 疾病医疗周期
        if self.check_words(self.lasttime_qwds,
                            question) and 'disease' in types:
            question_type = 'disease_lasttime'
            question_types.append(question_type)

        # 疾病治疗方式
        if self.check_words(self.cureway_qwds,
                            question) and 'disease' in types:
            question_type = 'disease_cureway'
            question_types.append(question_type)

        # 疾病治愈可能性
        if self.check_words(self.cureprob_qwds,
                            question) and 'disease' in types:
            question_type = 'disease_cureprob'
            question_types.append(question_type)

        # 疾病易感染人群
        if self.check_words(self.easyget_qwds,
                            question) and 'disease' in types:
            question_type = 'disease_easyget'
            question_types.append(question_type)

        # 若没有查到相关的外部查询信息,那么则将该疾病的描述信息返回
        if question_types == [] and 'disease' in types:
            question_types = ['disease_desc']

        # 若没有查到相关的外部查询信息,那么则将该疾病的描述信息返回
        if question_types == [] and 'symptom' in types:
            question_types = ['symptom_disease']

        # 将多个分类结果进行合并处理,组装成一个字典
        data['question_types'] = question_types

        logger.debug("data info: %s" % data)
        return data