예제 #1
0
def process_content():
    for i in tokenized:
        words = nltk.words_tokenize(i)
        tagged = nltk.pos_tag(words)

        chunkgram = r"""chunk: {<RB.?>*<VB.?>*<NNP>+<NN>?}"""
        chunkParser = nltk.RegexpParser(chunkgram)
        chunked = chunkgram.parse(tagged)

        chunked.draw()
예제 #2
0
def process_content():
        for i in tokenized:
            words = nltk.words_tokenize(i)
            tagged = nltk.pos_tag(words)    

            chunkgram = r"""chunk: {<.*>+} }<VB.?|IN|DT|TO>+{"""
            chunkParser = nltk.RegexpParser(chunkgram)
            chunked = chunkgram.parse(tagged)

            chunked.draw()
예제 #3
0
def bag_words(s, words):
    bag = [0 for _ in range(len(words))]

    s_words = nltk.words_tokenize(s)
    s_words = [stemmer.stem(word.lower()) for word in s_words]

    for se in s_words:
        for i, w in enumerate(words):
            if w == se:
                bag[i].append(1)

    return numpy.array(bag)
예제 #4
0
import pickle

with open("cauhoi.json") as file:
    data = json.load(file)
try:
    with open("data.pickle", "rb") as f:
        words, labels, training, output = pickle.load(f)
except:
    words = []
    labels = []
    docs_x = []
    docs_y = []

    for cauhoi in data["cauhoi"]:
        for question in cauhoi["question"]:
            wrds = nltk.words_tokenize(question)
            words.extend(wrds)
            docs_x.append(wrds)
            docs_y.append(cauhoi["tag"])

            if cauhoi["tag"] not in labels:
                labels.append(cauhoi["tag"])
                words = [stemmer.stem(w.lower()) for w in words if w != "?"]
                words = sorted(list(set(words)))

                labels = sorted(labels)
                training = []
                output = []

                out_empty = [0 for _ in range(len(labels))]