def __init__(self, paths): """ Constructor for the DialogueManager - Loads the intent recognizer (is this about programming, or just chit-chatting?) - Loads the tf-idf vectorizer (the vectorizer trained on the dialogue and StackOverflow thread questions) Parameters ---------- paths : dict Where the keys are names, and the values are lists of paths relative to this directory """ print("Loading resources...") # Create the chatbot self.create_chitchat_bot() # Intent recognition: self.intent_recognizer = \ unpickle_file(Path(*paths['INTENT_RECOGNIZER'])) self.tfidf_vectorizer = \ unpickle_file(Path(*paths['TFIDF_VECTORIZER'])) self.ANSWER_TEMPLATE = ('I think its about {}\n' 'This thread might help you: ' 'https://stackoverflow.com/questions/{}') # Goal-oriented part: self.tag_classifier = unpickle_file(Path(*paths['TAG_CLASSIFIER'])) self.thread_ranker = ThreadRanker(paths)
def __init__(self, ): print('Initializing corrector model...') self.trie = unpickle_file('model/correct_datrie.bin') self.word_frequence = unpickle_file('data/word_frequence.bin') self.load_lm() self.vocabulary = unpickle_file('data/vocabulary.bin') print('Corrector model established.')
def __init__(self, paths): # print("Loading resources...") # Intent recognition: self.intent_recognizer = unpickle_file(paths['INTENT_RECOGNIZER']) self.tfidf_vectorizer = unpickle_file(paths['TFIDF_VECTORIZER']) self.ANSWER_TEMPLATE = 'I think its about %s\n This thread might help you: ' \ 'https://stackoverflow.com/questions/%s ' # Goal-oriented part: self.tag_classifier = unpickle_file(paths['TAG_CLASSIFIER']) self.thread_ranker = ThreadRanker(paths)
def __init__(self, paths=utils.RESOURCE_PATH): print("Loading resources...") # Intent recognition: self.intent_recognizer = utils.unpickle_file(paths['INTENT_RECOGNIZER']) self.tfidf_vectorizer = utils.unpickle_file(paths['TFIDF_VECTORIZER']) self.ANSWER_TEMPLATE = 'I think its about %s\nThis thread might help you: https://stackoverflow.com/questions/%s' # Goal-oriented part: self.tag_classifier = utils.unpickle_file(paths['TAG_CLASSIFIER']) self.thread_ranker = ThreadRanker(paths) self.chatbot=ChatBot('tiksbot') trainer = ChatterBotCorpusTrainer(self.chatbot) trainer.train("chatterbot.corpus.english")
def __init__( self, datapath, ): print('Initializing retrival model...') self.data = pd.read_csv(datapath) self.tfidf = unpickle_file('model/tfidf.model') self.tfidf_vec = unpickle_file('data/doc_tfidf_vec.bin') self.inverse_idx = unpickle_file('data/inverse_idx_table.bin') self.word_2_id = unpickle_file('data/full_word2id.bin') self.id_2_word = {d: w for w, d in self.word_2_id.items()} self.word_2_id_for_filter = unpickle_file('data/tfidf_word2vec.bin') self.idf, self.avg_len = unpickle_file('data/idf_and_avglen.bin') self.word_vec = load_gensim_vec('data/wordvec_fasttext_300.txt') self.annoy = load_annoy_index('data/sentvec.ann') self.ltp = nlp.Ltp(seg=True, pos=True, seg_lexicon_path='data/lexicon_seg.txt', pos_lexicon_path='data/lexicon.txt') self.text_precess = nlp.ProcessText() self.stopwords = nlp.load_stop( ['data/chinese_stopwords.txt', 'data/哈工大停用词表.txt']) self.nonsense_word = ['请问', '想知道'] self.corrector = Corrector() print('Retrival model established.')
def __load_embeddings_by_tag(self, tag_name): """ Returns the thread_id and the thread_embeddings Parameters ---------- tag_name : str Name of the tag Returns ------- thread_ids : array-like, shape (n_ids, ) Array of the ids corresponding to the tag thread_embeddings : np.array, shape (n_ids, embedding_dim) The embedding to the corresponding tag """ embeddings_path = self.thread_embeddings_dir.joinpath(tag_name + '.pkl') thread_ids, thread_embeddings = unpickle_file(embeddings_path) return thread_ids, thread_embeddings
def __load_embeddings_by_tag(self, tag_name): embeddings_path = os.path.join(self.thread_embeddings_folder, tag_name + ".pkl") thread_ids, thread_embeddings = unpickle_file(embeddings_path) return thread_ids, thread_embeddings
def __init__(self, paths): self.embeddings, self.embeddings_dim = self._load_embeddings( paths['DIALOGUE_EMBEDDINGS']) self.question_vectors = unpickle_file(paths['QUESTION_VECTORS']) self.dialogues = datasets.readCornellData(paths['DIALOGUE_FOLDER'], max_len=100)