コード例 #1
0
def jaccard_sim(sentence_1, sentence_2):
    """
    Compute IoU meseaure on string tokens
    """
    sentence_1 = normalizeString(sentence_1, target)
    tokens_1 = sentence_1.split()
    sentence_2 = normalizeString(sentence_2, target)
    tokens_2 = sentence_2.split()

    union = np.unique(tokens_1.extend(tokens_2))
    intersection = [_ for _ in union if _ in tokens_1 and _ in tokens_2]
    return len(intersection) / len(union)
コード例 #2
0
ファイル: retrieve.py プロジェクト: YasminaKerkeb/Chatbot-ECL
def answer_question(question, s2v_model, w2v_model, data, similarity, k=1):
    """
    Uses vector representation and similarity function 
    to provide K possible answers to the question
    """
    # Clean question
    question = normalizeString(question)
    embed_matrix = s2v_model.get_embedding_matrix()
    query_vec = s2v_model.seq_vec_sent(question)
    result = ''

    if query_vec.shape[0] == embed_matrix.shape[0]:

        # Compute similarity
        X = similarity(query_vec, embed_matrix)[0]

        # Get best matchs indexes'
        indexes = np.argsort(X)

        # Extract responses from data
        Y = data.iloc[indexes, 1].drop_duplicates('last')
        Y_indexes = list(Y.index)
        responses = Y.to_numpy()[-k:]

        for i, rep in enumerate(responses):
            result += "Similarity : {} - {}\n".format(
                float(X[Y_indexes[-k + i]]), rep)

    else:
        resilt += 'seqvec embedding dimensions {} \n'\
                  'model embedding dimensions {}'.format(query_vec.shape, embed_questions.shape)

    return result
コード例 #3
0
ファイル: sent2vec.py プロジェクト: YasminaKerkeb/Chatbot-ECL
    def tf_idf_vector(self, sentence, emb_size=40):
        """
        Create vectors using tf_idf score
        """
        vec = np.zeros(emb_size)
        senctence = normalizeString(sentence)
        tokens = sentence.split()

        for i,token in enumerate(tokens):
            if token in self.tf_dict.keys(): 
                vec[i] = self.tf_dict[token]
        
        return vec
コード例 #4
0
ファイル: sent2vec.py プロジェクト: YasminaKerkeb/Chatbot-ECL
    def bow(self, sentence):
        """
        Create vectors using bag of words created from the list of words in dataset
        """
        vec = np.zeros(self.n_words)
        senctence = normalizeString(sentence)
        tokens = sentence.split()

        for i,token in enumerate(tokens):
            if word in self.words: 
                vec[i] = 1
        
        return vec
コード例 #5
0
    def reply(self, input_text):
        with torch.no_grad():
            sentences = [s.strip() for s in re.split('[\.\,\?\!]', input_text)]
            sentences = sentences[:-1]
            if sentences == []:
                sentences = [input_text]
            for sentence in sentences:
                trimmed_sentence = TrimWordsSentence(normalizeString(sentence))
                print(trimmed_sentence)
                answer_words, _ = self.model(trimmed_sentence,
                                             self.train_input_lang,
                                             self.train_output_lang)
                answer = ' '.join(answer_words)

        return answer
コード例 #6
0
ファイル: sent2vec.py プロジェクト: YasminaKerkeb/Chatbot-ECL
    def w_seq2vec_fun(self, sentence):
        """
        Computes sequence vector using input model
        """
        senctence = normalizeString(sentence)
        tokens = sentence.split()
        len_tokens = len(tokens)
        seq_vec = np.zeros_like(self.model[:, 0], dtype=float)

        for i in range(len_tokens):
            if tokens[i] in self.words:
                index = self.words.index(tokens[i])
                seq_vec += self.tf_dict[tokens[i]] * self.model[:, index]
        
        return seq_vec/len_tokens