Python Preprocessの例、online.nlu.preprocess.Preprocess Pythonの例

コード例 #1

0

ファイルを表示

ファイル: response_manage_old.py プロジェクト: zuiwufenghua/nlu_context

class PreProcessResponse(BaseResponse):
    name = rcategory.preprocess.value
    _preprocess_check_class = PreprocessCheck()
    _preprocess_class = Preprocess()

    def __init__(self):
        super(PreProcessResponse, self).__init__()

    def _preprocess(self, str_in):
        str_synonym, words = self._preprocess_class.process(str_in)
        return {'str_synonym': str_synonym, 'words_segment': words}

    def _check(self, input):
        bool_input, _msg = self._preprocess_check_class.check(input)
        return bool_input, _msg

    def _response(self, input):
        _data_return = copy.deepcopy(self._data_return)
        bool_input, _msg = self._check(input)
        if bool_input:
            _data_return['status'] = 1
            _data_return['data'] = self._preprocess(input.question)
        else:
            _data_return['status'] = 0
            _data_return['msg'] = _msg
        return _data_return

コード例 #2

0

ファイルを表示

class Action2(ActionBase):
    """只继承领域 class"""
    name = PatternClassificationMapNames.only_inherit_domain.value
    # _slots_maintain_class = SlotsMaintain()
    _prounoun_class = PronounDeal()
    _preprocess_class = Preprocess()

    def __init__(self):
        super(Action2, self).__init__()

    def action(self, sessionid_in):
        # 当前句子没有domain， 继承上文domain  当前句子有指代，继承domain， 当前句子有domain，不继承
        src_prounoun_word, dst_word_to_fufill, label2entities_cur_dict = self._prounoun_class.replace_words(
            sessionid_in)
        dst_dict = {cname.domain.value: dst_word_to_fufill[cname.domain.value]}
        return src_prounoun_word, dst_dict

コード例 #3

0

ファイルを表示

ファイル: fuzzy_interaction.py プロジェクト: zuiwufenghua/nlu_context

class InteractionBase(object):
    _entity_class = Entity()
    _preprocess_class = Preprocess()

    def __init__(self):
        pass

    @abc.abstractmethod
    def judge_interaction(self, sessionid_in, label2entities_cur_dict_in,
                          str_in):
        pass

    @abc.abstractmethod
    def deal_interaction(self, sessionid_in, label2entities_cur_dict_in,
                         str_in):
        pass

コード例 #4

0

ファイルを表示

ファイル: response_manage_old.py プロジェクト: zuiwufenghua/nlu_context

class RewriteResponse(BaseResponse):
    name = rcategory.rewrite.value
    _rewrite_check_class = RewriteCheck()
    _preprocess_class = Preprocess()
    _nerdeal_class = NerDeal()
    _usermanager = usermanager

    def __init__(self):
        super(RewriteResponse, self).__init__()

    def _rewrite(self, sessionid_in):
        # words_list_out, dst_word_to_fufill, label2entities_cur_dict, abbrev2stds_dict = self._nerdeal_class.replace_with_context(sessionid_in)
        words_list_out, dst_word_to_fufill, label2entities_cur_dict, flag_inter, abbrev_str, abbrev2std_list = self._nerdeal_class.replace_with_context(
            sessionid_in)

        return {
            'words_with_context': words_list_out,
            'words_with_context_notin_curstr': dst_word_to_fufill,
            'label2entities_cur_dict': label2entities_cur_dict,
            'abbrev2stds_dict': abbrev2std_list
        }

    def _check(self, input):
        bool_input, _msg = self._rewrite_check_class.check(input)
        return bool_input, _msg

    def _response(self, input):
        _data_return = copy.deepcopy(self._data_return)
        bool_input, _msg = self._check(input)
        if bool_input:
            _data_return['status'] = 1
            # str_synonym, words = self._preprocess_class.process(input.question)
            user_cur = self._usermanager.create(input.jdpin, input.session)
            # user_cur.update({'str_synonym':str_synonym,'str_synonym_cut':str_synonym})
            usermanager.update(input.session, str_raw=input.question)
            _data_return['data'] = self._rewrite(sessionid_in=input.session)
            _data_return['data']['str_synonym'] = user_cur['str_synonym']
        else:
            _data_return['status'] = 0
            _data_return['msg'] = _msg
        return _data_return

コード例 #5

0

ファイルを表示

class UserManager(object, metaclass=Singleton):
    _preprocess_class = Preprocess()
    _users_containers = {}

    def __init__(self):
        pass
        # print('__init__')
    def create(self, jdpin_in, session_id_in):
        if not session_id_in in self._users_containers:
            user_class = User()
            user_class['jdpin_id'] = jdpin_in
            user_class['session_id'] = session_id_in
            self._users_containers[session_id_in] = user_class
        else:
            pass
        return self._users_containers[session_id_in]

    def update(self, session_id, **kwargs):
        user_cur = self._users_containers[session_id]
        for key, value in kwargs.items():
            if key in user_cur.__dict__:
                user_cur[key] = value
            if key == 'str_raw':
                str_synonym, str_synonym_cut = self._preprocess_class.process(
                    value)
                str_raw_cut = jieba.lcut(value)
                user_cur['str_synonym'] = str_synonym
                user_cur['str_synonym_cut'] = str_synonym_cut
                user_cur['str_raw_cut'] = str_raw_cut
            if key == 'str_synonym':
                str_synonym_cut = jieba.lcut(value)
                user_cur['str_synonym'] = value
                user_cur['str_synonym_cut'] = str_synonym_cut

    def delete(self, session_id):
        if session_id in self._users_containers:
            self._users_containers.pop(session_id)

    def get_user(self, session_id_in):
        return self._users_containers[session_id_in]

コード例 #6

0

ファイルを表示

class Action3(ActionBase):
    """通用正常继承class"""
    name = PatternClassificationMapNames.common_classification.value
    _prounoun_class = PronounDeal()
    # _slots_maintain_class = SlotsMaintain()
    _similarmodel_class = SimilarModelSents()
    _preprocess_class = Preprocess()
    _depend_analysis_class = DependAnalysis()

    def __init__(self):
        super(Action3, self).__init__()

    def action_accurate(self, str_in, words_list_in, jdpin_in):
        src_prounoun_word, dst_word_to_fufill, label2entities_cur_dict = self._prounoun_class.replace_words(
            str_in, words_list_in, jdpin_in)
        return src_prounoun_word, dst_word_to_fufill, str_in

    def action_with_fuzzy(self, sessionid_in):
        # 同义句替换，先分析当前句子实体词情况，再决定是否模糊匹配
        user_cur = self._usermanager.get_user(sessionid_in)
        src_prounoun_word, dst_word_to_fufill, label2entities_cur_dict = \
            self._prounoun_class.replace_words(sessionid_in)
        property_value = label2entities_cur_dict[cname.property.value]
        domain_list = label2entities_cur_dict[cname.domain.value]
        if not property_value:
            if domain_list:
                domain_str = domain_list[0]
                str_no_domain = user_cur['str_synonym'].replace(domain_str, '')
            else:
                domain_str = ''
                str_no_domain = user_cur['str_synonym']
            strs_set, scores_set = self._similarmodel_class.similar_threshold(
                str_no_domain)
            if strs_set:
                # print('recall similarity===>',words_set)
                mylog.info('similar_threshold   str_no_domain\t\t{}'.format(
                    str(str_no_domain)))
                mylog.info('similar_threshold   words_set\t\t{}'.format(
                    str(strs_set)))
                mylog.info('similar_threshold   scores_set\t\t{}'.format(
                    str(scores_set)))

                str_similar = strs_set[0]
                # str_similar = ''.join(words_similar)
                str_similar_with_domain = domain_str + str_similar
                str_synonym, str_synonym_words = self._preprocess_class.process(
                    str_similar_with_domain)
                self._usermanager.update(sessionid_in,
                                         str_synonym=str_synonym,
                                         str_synonym_cut=str_synonym_words)
                src_prounoun_word, dst_word_to_fufill, label2entities_cur_dict = self._prounoun_class.replace_words(
                    sessionid_in)

            else:
                # if '是多久' in str_no_domain:
                #     print('str_no_domain==>',str_no_domain)
                if self._depend_analysis_class.judge_complete(str_no_domain):
                    dst_word_to_fufill = {
                        cname.domain.value:
                        dst_word_to_fufill[cname.domain.value]
                    }
                else:
                    pass
        else:
            pass

        return src_prounoun_word, dst_word_to_fufill

    def action(self, sessionid_in):
        # sentence_synonym, words_jieba = self._preprocess_class.process(str_in)

        return self.action_with_fuzzy(sessionid_in)

コード例 #7

0

ファイルを表示

ファイル: redis_operation.py プロジェクト: zuiwufenghua/nlu_context

if __name__ == "__main__":
    # setUserInfo('1234', '安行万里的投保年龄', '123')
    # setUserInfo('1234', '安行万里的保障地区', '123')
    # setUserInfo('1234', '安行万里的保险事故通知有哪些', '123')
    # setUserInfo('1234', '123', '123')
    # r=getUserInfo('1234')
    # print(r)
    import sys

    sys.path.append('../../')
    # from online.nlu.slots_maintain import SlotsMaintain
    from online.nlu.preprocess import Preprocess
    from online.nlu.query_rewrite import NerDeal

    preprocess_class = Preprocess()
    nerdeal_class = NerDeal()
    # slots_maintain_class = SlotsMaintain()

    jdpin = '12345'
    sententc = '安行万里的投保年龄'
    answer = '没问题'
    setQAPairs(jdpin, sententc, answer, 1)
    # print(rds.keys())
    sententc = '安行万里的保障地区'
    answer = '没问题'
    setQAPairs(jdpin, sententc, answer, 1)
    sententc = '安行万里的保险事故通知有哪些'
    answer = '没问题'
    setQAPairs(jdpin, sententc, answer, 1)
    sententc = '目的地填写错误怎么办'

コード例 #8

0

ファイルを表示

ファイル: recall_tfidf.py プロジェクト: zuiwufenghua/nlu_context

class SimilarModelBase(object):
    __metaclass__ = ABCMeta  # 指定该类的元类是ABCMeta
    _preprocess_class = Preprocess()
    _ngram_num = 1

    def __init__(self):
        self._preload()
        # pass

    def _preload(self):
        self._stopwords = self._preload_stopwords()

    def _preload_stopwords(self):
        """停用词加载"""
        stopwords_list = readfile_line2list(PathUtil().stopwords_filepath)
        words = list(itertools.chain.from_iterable(stopwords_list))
        return words

    @abc.abstractmethod
    def __load_similar_sents(self):
        # 加载同义句
        raise ValueError('要继承复写')

    def _filter_stopwords(self, words_in):
        # 从分词句子中，过滤 停用词
        return words_in
        for worditer in words_in:
            if list(worditer)[0] not in self._stopwords:
                words_ret.append(worditer)
        return words_ret

    def _get_words_freq(self, sentences_map_in):
        # 获取所有句子词频
        words_freq_dict = collections.defaultdict(int)
        # words_freq_dict_sorted=collections.Counter()
        for key, values in sentences_map_in.items():
            words_key_iter = self._preprocess_class.cut_words(key)
            # words_key_iter_with_label=psg.lcut(key)
            for word_iter in words_key_iter:
                words_freq_dict[word_iter] += 1
            for value_iter in values:
                words_iter = self._preprocess_class.cut_words(value_iter)
                for word_iter in words_iter:
                    words_freq_dict[word_iter] += 1
        words_freq_sorted = sorted(words_freq_dict.items(),
                                   key=lambda x: int(x[1]),
                                   reverse=True)
        return words_freq_sorted

    def _get_n_gram_row(self, sentence_in, n):
        words_key_iter = self._preprocess_class.cut_words(sentence_in)

        words_key_filtered = self._filter_stopwords(words_key_iter)
        words_key_combinate = list(
            itertools.combinations(words_key_filtered, r=n))
        return words_key_combinate, words_key_filtered

    def _get_n_gram(self, sentences_map_in, n):
        # 获取句子中，ngram
        words_std_and_similar_list = list()
        lengths = []
        sentences = list()
        for key, values in sentences_map_in.items():
            length_raw_words = 0
            words_std_and_similar_list_iter = list()

            words_key_combinate, words_key_filtered = self._get_n_gram_row(
                key, n)

            words_std_and_similar_list_iter.append(words_key_combinate)
            length_raw_words += len(words_key_filtered)
            for value_iter in values:
                words_value_combinate, words_value_filtered = self._get_n_gram_row(
                    value_iter, n)
                # words_iter = self._preprocess_class.cut_words(value_iter)
                # words_value_filtered = self._filter_stopwords(words_iter)
                length_raw_words += len(words_value_filtered)
                words_std_and_similar_list_iter.append(words_value_combinate)
            words_std_and_similar_list.append(words_std_and_similar_list_iter)
            lengths.append(length_raw_words)
            sentences.append([key, *values])
        return words_std_and_similar_list, lengths, sentences

コード例 #9

0

ファイルを表示

ファイル: query_rewrite.py プロジェクト: zuiwufenghua/nlu_context

class NerDeal(object):
    _prounoun_class = PronounDeal()
    _sentenceclassification_class = SentenceClassification()
    _fuzzy_interaction_class = FuzzyInterAction()
    _preprocess_class = Preprocess()
    _usermanager_class = usermanager
    _domain_classification_class = DomainClassification()
    _classification2action_class = Classification2Action()

    def __init__(self):
        pass

    def _pattern_classification(self, sentence_in):
        return self._sentenceclassification_class.classification(sentence_in)

    def replace_words_list(self, words_list_in, src_prounoun_word, dst_word_to_fufill):
        words_list_out = copy.deepcopy(words_list_in)
        dst_word_out = copy.deepcopy(dst_word_to_fufill)
        if src_prounoun_word:
            for src_word in src_prounoun_word:
                index_pronoun = words_list_out.index(src_word)
                for label, words_iter in dst_word_out.items():
                    if words_iter:
                        entity_to_be_repl = words_iter.pop()
                        words_list_out[index_pronoun] = entity_to_be_repl
                        break
                    else:
                        pass
        for key, value in dst_word_out.items():
            words_list_out.extend(value)
        return words_list_out

    def _get_last_questionbak(self, jdpin_in):
        chatRecordList = getQAPairs(jdpin_in)
        if chatRecordList:
            last_question = chatRecordList[-1].question
            last_answer = chatRecordList[-1].answer
        else:
            last_question, last_answer = '', ''
        return last_question, last_answer

    def _get_entity_from_last_answer(self, str_in):
        return self._prounoun_class.get_entity_from_strs([str_in], dcnames.alldomain.value)

    def _deal_last_question(self, session_id_in):
        last_question, last_answer, last_sentence_pattern_classification = _get_last_question(session_id_in)
        classification = self._pattern_classification(last_question)
        if classification == classification_name.query_product.value:
            label2entities_cur_dict, words_timestap = self._get_entity_from_last_answer(last_answer)
            # self._slots_maintain_class.update_slots(label2entities_cur_dict, session_id_in)
            usermanager.get_user(session_id_in).update_slots(-1, **{usname.slots_answer.value: label2entities_cur_dict})
        else:
            pass

    def _classification_sentence_pattern(self, sessionid_in,str_in):
        user_cur=usermanager.get_user(sessionid_in)
        classification = self._pattern_classification(str_in)
        if classification == classification_name.bottom_no_inherit.value:
            str_synonym, _ = self._preprocess_class.process(str_in)
            classification = self._pattern_classification(str_synonym)

        if _environ=='online':
            sentence_pattern_classification_from_kg = _get_sentence_pattern_from_kg(sessionid_in, user_cur['str_raw'])
            if sentence_pattern_classification_from_kg:
                # user_cur['sentence_pattern_classification'] = sentence_pattern_classification_from_kg
                classification=sentence_pattern_classification_from_kg
            else:
                pass
        return classification

    def _get_prounoun_and_dst_words(self, sessionid_in):
        return self._classification2action_class.act(sessionid_in)

    def replace_with_context_only_question(self, sessionid_in):
        """
            根据历史对话，从历史对话提取实体，根据当前str中是否存在指代词，决定 实体是补充还是替换
            args:
                str_in: 输入字符串
                words_list_in：输入字符串分词结果
                jdpin_in： id
            return：
                words_list_out: 补全后的分词列表
                dst_word_out：   从历史对话记录提取的slots
                label2entities_cur_dict： 当前用户话提取的slots，准备存入列表的slots
        """
        user_cur = self._usermanager_class.get_user(sessionid_in)
        sentence_classification = self._classification_sentence_pattern(sessionid_in,user_cur['str_raw'])
        mylog.info('sentence_classification:\t\t{}\tstr_raw\t{}'.format(sentence_classification, user_cur['str_raw']))
        self._usermanager_class.update(sessionid_in, sentence_pattern_classification=sentence_classification)
        src_prounoun_word, dst_word_to_fufill = self._get_prounoun_and_dst_words(sessionid_in)
        label2entities_cur_dict, words_timestap = self._prounoun_class.get_entity_from_strs([user_cur['str_synonym']],
                                                                                            user_cur[
                                                                                                'domain_classification'])
        words_list_out = self.replace_words_list(user_cur['str_synonym_cut'], src_prounoun_word, dst_word_to_fufill)
        usermanager.update(sessionid_in,**{'dst_word_to_fufill':dst_word_to_fufill,'label2entities_cur_dict':label2entities_cur_dict})
        return words_list_out, dst_word_to_fufill, label2entities_cur_dict



    def replace_with_context(self, sessionid_in):
        self._deal_last_question(sessionid_in)
        # todo 添加分类
        user_cur = usermanager.get_user(sessionid_in)
        # todo 添加当前槽位切换
        domain_classification = self._domain_classification_class.classify(user_cur['str_synonym'])
        mylog.info('domain_classification:\t\t{}'.format(domain_classification))
        usermanager.update(sessionid_in, domain_classification=domain_classification)

        # usermanager.get_user(sessionid_in).update_slots(index=-1,sentence_pattern_classification=sentence_pattern_classification_from_kg)
        # self._usermanager_class.update(sessionid_in,{'str_raw':str_in,'str_synonym':str_synonym,
        #                                              'domain_classification':domain_classification,'str_synonym_cut':str_synonym_cut})
        words_list_out, dst_word_to_fufill, label2entities_cur_dict = self.replace_with_context_only_question(
            sessionid_in)
        # print('words_list_out, dst_word_to_fufill, label2entities_cur_dict==>',words_list_out, dst_word_to_fufill, label2entities_cur_dict)
        flag_inter, abbrev_str, abbrev2std_list = self._fuzzy_interaction_class.deal_fuzzy_interaction(sessionid_in,
                                                                                                       label2entities_cur_dict,
                                                                                                       user_cur[
                                                                                                           'str_raw'],
                                                                                                       words_list_out)
        usermanager.get_user(sessionid_in).insert_slots(
            **{'question': user_cur['str_synonym'], 'slots_question': label2entities_cur_dict,
               'domain_classification': user_cur['domain_classification'],
               'sentence_pattern_classification': user_cur['sentence_pattern_classification']})
        rds.set(user_cur['jdpin_id'] + _flag_recomm_sent_pattern, 0)
        return words_list_out, dst_word_to_fufill, label2entities_cur_dict, flag_inter, abbrev_str, abbrev2std_list