def process_answords(answords): answords = nltk.word_tokenize(answords) answords_sent = ' '.join(answords).lower() answords_sent = tools.replace_punct(answords_sent) answords_sent = tools.replace_number(answords_sent) answords = answords_sent.split() return answords
def process_sentence(sent): sent = sent.lower() words = nltk.word_tokenize(sent) sent = " ".join(words) sent = tools.replace_punct(sent) sent = tools.replace_number(sent) sent = " ".join([w.strip() for w in sent.split() if len(w.strip()) > 0]) # Remove extra spaces return sent
def process_sentence(sent): sent = sent.lower() words = nltk.word_tokenize(sent) sent = ' '.join(words) sent = tools.replace_punct(sent) sent = tools.replace_number(sent) sent = ' '.join([w.strip() for w in sent.split() if len(w.strip()) > 0]) #Remove extra spaces return sent
def process_qwords(qwords): qwords = ' '.join(qwords) qwords = nltk.word_tokenize(qwords) qwords_sent = ' '.join(qwords).lower() qwords_sent = tools.replace_punct(qwords_sent) qwords_sent = tools.replace_number(qwords_sent) qwords = qwords_sent.split() qwords = ' '.join([tools.START_TOKEN] + qwords + [tools.END_TOKEN]) return qwords