def __init__(self, paths):
        """
        Constructor for the DialogueManager

        - Loads the intent recognizer (is this about programming, or just
          chit-chatting?)
        - Loads the tf-idf vectorizer (the vectorizer trained on the dialogue
          and StackOverflow thread questions)

        Parameters
        ----------
        paths : dict
            Where the keys are names, and the values are lists of paths
            relative to this directory
        """
        print("Loading resources...")

        # Create the chatbot
        self.create_chitchat_bot()

        # Intent recognition:
        self.intent_recognizer = \
            unpickle_file(Path(*paths['INTENT_RECOGNIZER']))
        self.tfidf_vectorizer = \
            unpickle_file(Path(*paths['TFIDF_VECTORIZER']))

        self.ANSWER_TEMPLATE = ('I think its about {}\n'
                                'This thread might help you: '
                                'https://stackoverflow.com/questions/{}')

        # Goal-oriented part:
        self.tag_classifier = unpickle_file(Path(*paths['TAG_CLASSIFIER']))
        self.thread_ranker = ThreadRanker(paths)
 def __init__(self, ):
     print('Initializing corrector model...')
     self.trie = unpickle_file('model/correct_datrie.bin')
     self.word_frequence = unpickle_file('data/word_frequence.bin')
     self.load_lm()
     self.vocabulary = unpickle_file('data/vocabulary.bin')
     print('Corrector model established.')
Exemple #3
0
    def __init__(self, paths):
        # print("Loading resources...")

        # Intent recognition:
        self.intent_recognizer = unpickle_file(paths['INTENT_RECOGNIZER'])
        self.tfidf_vectorizer = unpickle_file(paths['TFIDF_VECTORIZER'])

        self.ANSWER_TEMPLATE = 'I think its about %s\n This thread might help you: ' \
                               'https://stackoverflow.com/questions/%s '

        # Goal-oriented part:
        self.tag_classifier = unpickle_file(paths['TAG_CLASSIFIER'])
        self.thread_ranker = ThreadRanker(paths)
Exemple #4
0
    def __init__(self, paths=utils.RESOURCE_PATH):
        print("Loading resources...")

        # Intent recognition:
        self.intent_recognizer = utils.unpickle_file(paths['INTENT_RECOGNIZER'])
        self.tfidf_vectorizer = utils.unpickle_file(paths['TFIDF_VECTORIZER'])

        self.ANSWER_TEMPLATE = 'I think its about %s\nThis thread might help you: https://stackoverflow.com/questions/%s'

        # Goal-oriented part:
        self.tag_classifier = utils.unpickle_file(paths['TAG_CLASSIFIER'])
        self.thread_ranker = ThreadRanker(paths)

        self.chatbot=ChatBot('tiksbot')
        trainer = ChatterBotCorpusTrainer(self.chatbot)
        trainer.train("chatterbot.corpus.english")
    def __init__(
        self,
        datapath,
    ):
        print('Initializing retrival model...')
        self.data = pd.read_csv(datapath)
        self.tfidf = unpickle_file('model/tfidf.model')

        self.tfidf_vec = unpickle_file('data/doc_tfidf_vec.bin')
        self.inverse_idx = unpickle_file('data/inverse_idx_table.bin')

        self.word_2_id = unpickle_file('data/full_word2id.bin')
        self.id_2_word = {d: w for w, d in self.word_2_id.items()}
        self.word_2_id_for_filter = unpickle_file('data/tfidf_word2vec.bin')
        self.idf, self.avg_len = unpickle_file('data/idf_and_avglen.bin')

        self.word_vec = load_gensim_vec('data/wordvec_fasttext_300.txt')
        self.annoy = load_annoy_index('data/sentvec.ann')

        self.ltp = nlp.Ltp(seg=True,
                           pos=True,
                           seg_lexicon_path='data/lexicon_seg.txt',
                           pos_lexicon_path='data/lexicon.txt')
        self.text_precess = nlp.ProcessText()

        self.stopwords = nlp.load_stop(
            ['data/chinese_stopwords.txt', 'data/哈工大停用词表.txt'])
        self.nonsense_word = ['请问', '想知道']

        self.corrector = Corrector()
        print('Retrival model established.')
    def __load_embeddings_by_tag(self, tag_name):
        """
        Returns the thread_id and the thread_embeddings

        Parameters
        ----------
        tag_name : str
            Name of the tag

        Returns
        -------
        thread_ids : array-like, shape (n_ids, )
            Array of the ids corresponding to the tag
        thread_embeddings : np.array, shape (n_ids, embedding_dim)
            The embedding to the corresponding tag
        """

        embeddings_path = self.thread_embeddings_dir.joinpath(tag_name +
                                                              '.pkl')
        thread_ids, thread_embeddings = unpickle_file(embeddings_path)

        return thread_ids, thread_embeddings
Exemple #7
0
 def __load_embeddings_by_tag(self, tag_name):
     embeddings_path = os.path.join(self.thread_embeddings_folder, tag_name + ".pkl")
     thread_ids, thread_embeddings = unpickle_file(embeddings_path)
     return thread_ids, thread_embeddings
Exemple #8
0
 def __init__(self, paths):
     self.embeddings, self.embeddings_dim = self._load_embeddings(
         paths['DIALOGUE_EMBEDDINGS'])
     self.question_vectors = unpickle_file(paths['QUESTION_VECTORS'])
     self.dialogues = datasets.readCornellData(paths['DIALOGUE_FOLDER'],
                                               max_len=100)