Esempio n. 1
0
# coding:utf8
# @Time    : 18-7-9 上午10:29
# @Author  : evilpsycho
# @Mail    : [email protected]
import pysparnn.cluster_index as ci
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.externals.joblib import load, dump

from chatbot.utils.log import get_logger

logger = get_logger("TfidfQA")


class TfidfQA(object):
    '''
    词频-逆文本频率
    '''
    def __init__(self, path=None):
        '''

        :param path: 文件路径
        '''
        if path is None:
            self._tfidf = TfidfVectorizer()
            self._ci = None
        else:
            self._tfidf = load(path + ".tfidf")
            self._ci = load(path + ".cluster_index")

    def fit(self, queries, queries_idx):
        """
Esempio n. 2
0
# coding:utf8
# @Time    : 18-5-14 下午2:13
# @Author  : evilpsycho
# @Mail    : [email protected]
import os
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).resolve().parent.parent.parent))

import opencc

from chatbot.utils.log import get_logger
from chatbot.utils.wrapper import time_counter

logger = get_logger("Text Preprocessing T2S")


@time_counter
def traditional2simple(input_path, output_path):
    """繁体转简体

    :param input_path:
    :param output_path:
    :return:
    """
    # input check
    assert os.path.exists(input_path)
    # output check
    path = Path(output_path).resolve()
    path.parent.mkdir(exist_ok=True)
    # convert
Esempio n. 3
0
# @Time    : 5/14/18 22:07
# @Author  : evilpsycho
# @Mail    : [email protected]
from pathlib import Path
import sys
sys.path.append(str(Path(__file__).resolve().parent.parent.parent))

import gensim
import numpy as np

from chatbot.utils.log import get_logger
from chatbot.utils.wrapper import time_counter
from chatbot.config.constant import PAD, PAD_IDX, UNK, UNK_IDX
from chatbot.cparse.vocabulary import Vocabulary

logger = get_logger("Word2vec")


class Word2vecExt(object):
    def __init__(self):
        self.model = None

    def _input2sentences(self, inputs, **kwargs):
        if isinstance(inputs, str):
            path = Path(inputs).resolve()
            if path.is_dir():
                sentences = gensim.models.word2vec.PathLineSentences(
                    inputs, **kwargs)
            else:
                sentences = gensim.models.word2vec.LineSentence(
                    inputs, **kwargs)
Esempio n. 4
0
# coding:utf8
# @Time    : 18-6-6 上午9:37
# @Author  : evilpsycho
# @Mail    : [email protected]
import datetime as dt
from chatbot.preprocessing.text import cut
from chatbot.core.context import Context
from threading import Timer
from chatbot.utils.log import get_logger
from chatbot.utils.chat_record import chat_record
TIMEOUT = 60
logger = get_logger("chatbot")


class ChatBot(object):
    def __init__(self, vocab, label, intent_model, intent_rule, ner,
                 intent2skills):
        self.contexts = dict()
        self.intent2skills = intent2skills
        self.intent_model = intent_model
        self.intent_rule = intent_rule
        self.ner = ner
        skills = []
        for i, s in self.intent2skills.items():
            if s not in skills:
                skills.append(s)
        self.skills = skills
        self.vocab = vocab
        self.label = label
        self._delete_timeout_context()
Esempio n. 5
0
# @Author  : evilpsycho
# @Mail    : [email protected]
import pickle

import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from sklearn.metrics import accuracy_score

from chatbot.core.serializable import Serializable
from chatbot.core.trainable import Trainable
from chatbot.core.estimator import Estimator
from chatbot.utils.path import MODEL_PATH
from chatbot.utils.log import get_logger
logger = get_logger("intent model")


class BaseIntentModel(nn.Module, Serializable, Trainable, Estimator):
    def __init__(self,
                 param: dict,
                 opt=torch.optim.Adam,
                 metric=accuracy_score,
                 loss=F.cross_entropy,
                 save_path="default"):
        super().__init__()
        self.param = param

        self.opt = None
        self.loss = loss
        self.metric = metric
Esempio n. 6
0
# -*- coding: utf-8 -*-
# @Time    : 5/12/18 13:23
# @Author  : evilpsycho
# @Mail    : [email protected]
from chatbot.utils import path
from chatbot.utils.log import get_logger

from pathlib import Path
import multiprocessing as mp

import jieba

CPU = mp.cpu_count()
logger = get_logger("Text cut")
SEG_VOCAB_PATH = Path(path.ROOT_PATH, "config",
                      "vocab_jieba_seg").resolve().absolute()
jieba.load_userdict(str(SEG_VOCAB_PATH))
jieba.initialize()


def _cut2list(x):
    return list(jieba.cut(x))


def _cut2str(x):
    return " ".join(jieba.cut(x))


# @time_counter
def cut(x, n_job=None, join=False):
    """ 分词功能,接收一个字符串,返回分词结果
Esempio n. 7
0
# coding:utf8
# @Time    : 18-6-11 下午2:41
# @Author  : evilpsycho
# @Mail    : [email protected]
import random
import codecs

from chatbot.core.skill import BaseSkill
from chatbot.utils.path import ROOT_PATH
from chatbot.utils.log import get_logger

logger = get_logger("simple skill")


def read_txt(path):
    """read txt

        :return: <list>
        """
    #        path = "D:\\Users\\tanmx\\chatbot\\Task-Oriented-Chatbot\\corpus\\skill\\GoodBye_response.txt"
    with open(path, "r", encoding='UTF-8') as f:
        txts = f.readlines()
    # remove chomp, blank
    sents = [item.strip().split(' ')[-1] for item in txts if len(item) > 1]
    return sents


class LeaveMessage(BaseSkill):
    """LeaveMessage存储及回复封装
      :param context: context
      :return: <String> 回复信息,context{user:,query} to txt
Esempio n. 8
0
# @Time    : 18-5-21 下午2:10
# @Author  : evilpsycho
# @Mail    : [email protected]
from chatbot.config.constant import UNDEFINE, UNDEFINE_IDX
from chatbot.utils.log import get_logger
from chatbot.cparse.dictionary import Dictionary
from chatbot.utils.path import ROOT_PATH


def get_intent_labels(name):
    with open(str(ROOT_PATH / "config" / name), "r") as f:
        labels = [l.rstrip("\n") for l in f.readlines()]
    return labels


logger = get_logger(__name__)


class IntentLabel(Dictionary):
    def __init__(self):
        # TODO: init class for config intent
        super().__init__()

    def init_from_config(self, name):
        intent_labels = get_intent_labels(name)
        for label in intent_labels:
            self._add_one(label)
        self.training = False

    def fit(self, x):
        """
Esempio n. 9
0
# -*- coding: utf-8 -*-
# @Time    : 5/12/18 13:22
# @Author  : evilpsycho
# @Mail    : [email protected]
from gensim.corpora import WikiCorpus
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).resolve().parent.parent.parent))

from chatbot.utils.log import get_logger
from chatbot.utils.wrapper import time_counter

logger = get_logger("Wiki Extract")


@time_counter
def wiki_extract(input_file, output_file):
    """wiki下载文件提取文本内容脚本

    :param input_file: 原始文件路径
    :param output_file:  提取文件路径
    :return: None
    """
    # 原始文件是否存在
    assert Path(input_file).resolve().exists()
    # 提取文件路径不存在就新建
    output_file_path = Path(output_file).resolve()
    output_file_path.parent.mkdir(exist_ok=True)
    logger.info("Start extract wiki ..")
    wiki = WikiCorpus(input_file, lemmatize=False)
    with open(output_file, "w", encoding="utf8") as f:
Esempio n. 10
0
# coding:utf8
# @Time    : 18-6-25 下午2:41
# @Author  : evilpsycho
# @Mail    : [email protected]
import sys
sys.path.append("/home/zhouzr/project/Task-Oriented-Chatbot")
from chatbot.utils.log import get_logger
logger = get_logger("test")
from chatbot.intent.models.fast_text import FastText
from chatbot.intent.rules.rule_v1 import IntentRuleV1
from chatbot.ner.rules.rule_ner import NerRuleV1
from chatbot.cparse.label import IntentLabel
from chatbot.cparse.vocabulary import Vocabulary
from chatbot.skills.simple import SayHi, Thanks, Praise, Criticize, GoodBye, LeaveMessage, CompanyInfo, BusinessInfo
from chatbot.skills.botQA import BotQA
from chatbot.skills.data_query import TestDataQuery, DataInquiry
from chatbot.skills.help import Help
from chatbot.skills.safe import SafeResponse
from chatbot.skills.tuling import Tuling
from chatbot.skills.file_retrieval import FileRetrievalExt
from chatbot.bot import ChatBot
from chatbot.utils.path import MODEL_PATH
from wxpy import Bot

intent_model = FastText.load(
    str(MODEL_PATH / "v0.21" / "intent_model.FastText"))
intent_rule = IntentRuleV1()
ner = NerRuleV1()
label = IntentLabel.load(str(MODEL_PATH / "v0.21" / "label"))
vocab = Vocabulary.load(str(MODEL_PATH / "v0.21" / "vocab"))
file_retrieval = FileRetrievalExt(
Esempio n. 11
0
# coding:utf8
# @Time    : 18-6-7 上午10:51
# @Author  : evilpsycho
# @Mail    : [email protected]
from chatbot.core.entity import TimeInterval, Location, Company, Tag
from chatbot.ner.rules.company_ner import CompanyNer
from chatbot.ner.rules.time_ner import TimeNer
from chatbot.ner.rules.location_ner import LocationNer
from chatbot.ner.rules.tag_ner import TagNer
from chatbot.utils.log import get_logger

logger = get_logger("NER")


class NerRuleV1:
    def __init__(self):
        super().__init__()
        self.ner_company = CompanyNer()
        self.ner_loc = LocationNer()
        self.ner_time = TimeNer()
        self.ner_tag = TagNer()

    def extract(self, context):
        """
        :param context: context["query"]
        :return: <dict of list>
        {"TimeInterval": ["", ""]}

        """
        rst = {}
        ext_time = self.ner_time.extract(context)
Esempio n. 12
0
# coding:utf8
# @Time    : 18-5-15 下午4:16
# @Author  : evilpsycho
# @Mail    : [email protected]
from torch import nn
import torch
from torch.nn import functional as F

from chatbot.intent.models.base_intent_model import BaseIntentModel
from chatbot.utils.log import get_logger

logger = get_logger("TextCNN")


class TextCNN(BaseIntentModel):
    def __init__(self, param: dict):
        super().__init__(param)
        ci = 1  # input chanel size
        kernel_num = param['kernel_num']  # output chanel size
        kernel_size = param['kernel_size']
        vocab_size = param['vocab_size']
        embed_dim = param['embed_dim']
        dropout = param['dropout']
        class_num = param['class_num']
        self.param = param
        self.embed = nn.Embedding(vocab_size, embed_dim, padding_idx=1)
        self.conv11 = nn.Conv2d(ci, kernel_num, (kernel_size[0], embed_dim))
        self.conv12 = nn.Conv2d(ci, kernel_num, (kernel_size[1], embed_dim))
        self.conv13 = nn.Conv2d(ci, kernel_num, (kernel_size[2], embed_dim))
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(len(kernel_size) * kernel_num, class_num)
Esempio n. 13
0
# -*- coding: utf-8 -*-
# @Time    : 5/12/18 14:17
# @Author  : evilpsycho
# @Mail    : [email protected]
import time
from functools import wraps

from chatbot.utils.log import get_logger

time_counter_logger = get_logger("Time Counter")


def time_counter(func):
    @wraps(func)
    def time_it(*args, **kwargs):
        s = time.time()
        rst = func(*args, **kwargs)
        e = time.time()
        t = (e - s) / 60
        time_counter_logger.info(func.__name__ + " running %.2f min" % t)
        return rst

    return time_it