Esempio n. 1
0
def token_postag(text):
    text_array = []
    tokens = nltk.word_tokenizer(text)
    for tuple in nltk.pos_tag(tokens):
        if tuple[1] == "NN" or tuple[1] == "JJ":
            text_array.append(tuple[0])
    return text_array
Esempio n. 2
0
def _ie_preprocess(document):
  sentences = nltk.sent_tokenize(document)
  sentences = [nltk.word_tokenizer(sent) for sent in sentences]
  sentences = [nltk.os_tag(sent) for sent in sentences]
Esempio n. 3
0
 def Tokenization(self):
     """
     Tokenizes a sentence.
     """
     self.tokens = [nltk.word_tokenizer(sent) for sent in self.sentences]
Esempio n. 4
0
with open("intents.json") as file:
    data = json.load(file)

try:
    with open("data.picle", "rb") as f:
        words, labels, training, output = pickle.load(f)
except:     
    words=[]
    labels=[]
    docs_x=[]
    docs_y=[]

    for intent in data["intents"]:
        for pattern in intent["pattern"]:
            wrds = nltk.word_tokenizer(pattern)
            words.extend(wrds)
            docs_x.append(wrds)
            docs_y.append(intent["tag"])

        if intent["tag"] not in labels:
            labels.append(intent["tag"]

    words=[stemmer.stem(w.lower()) for w in words if w != "?"]
    words = sorted(list(set(words)))

    labels = sorted(labels)
                          
    training = []
    output = []