def token_postag(text): text_array = [] tokens = nltk.word_tokenizer(text) for tuple in nltk.pos_tag(tokens): if tuple[1] == "NN" or tuple[1] == "JJ": text_array.append(tuple[0]) return text_array
def _ie_preprocess(document): sentences = nltk.sent_tokenize(document) sentences = [nltk.word_tokenizer(sent) for sent in sentences] sentences = [nltk.os_tag(sent) for sent in sentences]
def Tokenization(self): """ Tokenizes a sentence. """ self.tokens = [nltk.word_tokenizer(sent) for sent in self.sentences]
with open("intents.json") as file: data = json.load(file) try: with open("data.picle", "rb") as f: words, labels, training, output = pickle.load(f) except: words=[] labels=[] docs_x=[] docs_y=[] for intent in data["intents"]: for pattern in intent["pattern"]: wrds = nltk.word_tokenizer(pattern) words.extend(wrds) docs_x.append(wrds) docs_y.append(intent["tag"]) if intent["tag"] not in labels: labels.append(intent["tag"] words=[stemmer.stem(w.lower()) for w in words if w != "?"] words = sorted(list(set(words))) labels = sorted(labels) training = [] output = []