Python wtk Examples, nltk.tokenize.wtk Python Examples

Example #1

0

Show file

File: jarvis.py Project: hero-007/J.A.R.V.I.S---Intelligent-Personnel-Assistance

def wake_jarvis_up(re_text):
    re_text = re_text.lower()
    text_list = wtk(re_text)
    for i in text_list:
        if i == 'jarvis' or i == 'buddy':
            return 'jarvis'
    return

Example #2

0

Show file

File: saybot.py Project: praveen-manohar/saybot

    def response(self,msg,br,inte):
        user = msg
        lst = wtk(user)
        #name = self.botname(bot)
        model = load_model(br)
        intents = json.loads(open(inte).read())

        if user != "quit":
            if 'youtube' in lst:
                text = user.split()
                y = "youtube"
                vid = ' '.join([i for i in text if i not in y])
                f.open(vid)
                response = "result will show on your web bowser"
                return response
            elif 'wikipedia' in lst:
                text = user.split() 
                y = "wikipedia"
                word = ' '.join([i for i in text if i not in y])
                response = f.short(word)
                return response
            elif 'google' in lst:
                text = user.split()
                y = "google"
                qus = ' '.join([i for i in text if i not in y])
                f.browser_search(qus)
                response = "result will show on your web browser"
                return response
            else:
                ints = self.predict_class(msg, model)
                response = self.getResponse(ints, intents)
                return response
        else:
            quit()

Example #3

0

Show file

File: jarvis.py Project: hero-007/J.A.R.V.I.S---Intelligent-Personnel-Assistance

def task_performing_word(input_sent):
    input_list = wtk(input_sent)
    task_word = ''
    for i in input_list:
        if i == 'book':
            task_word = 'book'
            break
        elif i == 'recycle':
            task_word = 'empty'
            break
        elif i == 'lock':
            task_word = 'lock'
            break
        elif i == 'news':
            task_word = 'news'
            break
        elif i == 'open':
            task_word = 'open'
            break
        elif (i == 'search' or i == 'look') and ('wikipedia' in input_list):
            task_word = 'wikipedia'
            break
        elif i == 'song' or i == 'bored':
            task_word = 'song'
            break
        elif i == 'search' and input_list[1] == 'youtube':
            task_word = 'youtube'
            break
        elif i == 'search' or i == 'look':
            task_word = 'browser'
            break
    if task_word == '':
        task_word = 'wolf'
    return task_word

Example #4

0

Show file

 def read(self, path=''):
     if os.path.isfile(path + self.file):
         try:
             #TODO 'cp1252' with Crime-and-Punishment.txt
             file = open(path + self.file, 'r', encoding='utf8')
             self.text = file.read()
             # \s includes [\t\n\r\f\v]
             # self.text = re.sub(pattern='\s+', repl=' ', string=self.text).strip()
             self.tokens = wtk(self.text)  # Tokenization
         except IOError:
             logging.ERROR('\t Cannot open ' + self.file)

Example #5

0

Show file

File: novel.py Project: IDSIA/novel2graph

    def parse_persons(self):
        people = {}
        name = ""
        # contains_punctuations = False
        tokenized_sentences = [wtk(sentence) for sentence in self.sentences]
        tagged_sentences = self.tagger.tag_sents(tokenized_sentences)
        for sentence in tagged_sentences:
            for word, tag in sentence:
                # a name is made of 1 or more names, read all
                if tag == 'PERSON':
                    if len(word) == 1:
                        # print(word)
                        continue
                    # all strange symbols: '!"”“#$%"&\'’()*+,./:;<=>?@[]^_`{|}~ʹ'
                    # if word start or end with special characters, drop them
                    # if word[0] in '!"”"“#$%"&\'’()*+,/:;<=>?@[]^_`{|}~ʹ':
                    #     word = word[1:]
                    #     contains_punctuations = True
                    # if word[-1] in '!"”"“#$%"&\'’()*+,/:;<=>?@[]^_`{|}~ʹ':
                    #     word = word[:-1]
                    #     contains_punctuations = True
                    if name == "":
                        name += word
                    else:
                        name += " " + word
                else:
                    # name is not empty
                    if name:
                        name = name.strip()
                        current_name = name.split(" ")
                        # if len(current_name) >= 2 and contains_punctuations:
                        #     print(name)
                        # Usually and/ed/or are identified as name, e.g. Tom and Jerry
                        if len(current_name) == 3 and (current_name[1] == 'and' or current_name[1] == 'to' or \
                                                       current_name[1][-2:] == 'ed' or current_name[1] == 'or' or \
                                                       current_name[1] == 'nor'):
                            people[current_name[0]] = people.get(current_name[0], 0) + 1
                            people[current_name[2]] = people.get(current_name[2], 0) + 1
                        # Usually 2 words name contains adverbs or adjectives (...ly) verb (...ed), remove them
                        elif len(current_name) == 2 and ((current_name[1] in string.punctuation) or \
                                                         (current_name[1][-2:] == 'ed') or \
                                                         (current_name[1][-2:]) == 'ly' or \
                                                         (current_name[1].lower() in CONJUNCTIONS)):
                            people[current_name[0]] = people.get(current_name[0], 0) + 1
                        elif len(current_name) == 1 and current_name[0] in FALSE_POSITIVES:
                            name = ""
                        else:
                            people[name] = people.get(name, 0) + 1
                        name = ""
                        # contains_punctuations = False

        self.persons = collections.OrderedDict(sorted(people.items()))
        return

Example #6

0

Show file

File: book_api.py Project: hero-007/J.A.R.V.I.S---Intelligent-Personnel-Assistance

def give_me_book_name(book_sent):
    book_sent = book_sent.lower()
    word_list = wtk(book_sent)
    book_name = ''
    t = len(word_list)

    for i in range(0, t, 1):
        if word_list[i] == 'book':
            for k in range(i + 1, t, 1):
                book_name = book_name + word_list[k]
            break

    return book_name

Example #7

0

Show file

File: youtube_feature.py Project: hero-007/J.A.R.V.I.S---Intelligent-Personnel-Assistance

def get_youtube_topic(query_name):
    for_check = False
    topic_search = ''
    query_name = query_name.lower()
    query_name = query_name.strip(' ')
    query_list = wtk(query_name)

    for i in range(0, len(query_list), 1):
        if query_list[i] == 'for':
            for_check = True
            continue
        if for_check == True:
            topic_search = topic_search + query_list[i] + ' '
    return topic_search

Example #8

0

Show file

File: news_api.py Project: hero-007/J.A.R.V.I.S---Intelligent-Personnel-Assistance

def give_me_news_name(news_sent):
    news_sent = news_sent.lower()
    news_list = wtk(news_sent)

    for i in range(0, len(news_list), 1):
        if news_list[i] == 'category':
            return news_list[i], news_list[i - 1]

        if news_list[i] == 'from':
            return news_list[i], news_list[i + 1]

        if news_list[i] == 'on':
            return news_list[i], news_list[i + 1]

    return ' '

Example #9

0

Show file

def give_me_topic_name(user_ask):
    topic = ' '
    user_ask = user_ask.lower()
    user_list = wtk(user_ask)
    user_ask_len = len(user_list)
    i, j = 0, (user_ask_len - 1)
    for k in range(0, user_ask_len, 1):
        if user_list[k] == 'for':
            i = k + 1
        if user_list[k] == 'on' or user_list[k] == 'in':
            j = k - 1
    for t in range(i, j + 1, 1):
        topic = topic + user_list[t] + ' '
    topic = topic.strip(' ')
    return topic

Example #10

0

Show file

def browser_search(query):
    search_topic = ''
    for_check = False
    query = query.lower()
    query = query.strip(' ')
    query_list = wtk(query)
    if ('for' in query_list) or ('about' in query_list):
        for i in range(0,len(query_list),1):
            if (query_list[i] == 'for') or (query_list[i] == 'about'):
                for_check = True
                continue
            if for_check == True:
                search_topic = search_topic+query_list[i]+' '
    else:
        search_topic = query.replace('search','')
    search_topic = search_topic.strip(' ')
    search_topic.replace(' ','%20')
    final_url = base_url+search_topic
    wb.open(final_url)
    return

Example #11

0

Show file

def task_performing_word(input_sent):
    input_list = wtk(input_sent)
    task_word = ''

    if 'book' in input_list:
        task_word = 'book'
    elif 'news' in input_list:
        task_word = 'news'
    elif ('search' in input_list or 'look' in input_list) and ('wikipedia'
                                                               in input_list):
        task_word = 'wikipedia'
    elif ('song' in input_list) or ('bored' in input_list):
        task_word = 'song'
    elif ('search' in input_list) and ('youtube' in input_list):
        task_word = 'youtube'
    elif ('search' in input_list) or ('look' in input_list):
        task_word = 'browser'
    else:
        task_word = 'wolf'
    return task_word

Example #12

0

Show file

def short(user_ask):
    #get data
    topic = ' '
    user_ask = user_ask.lower()
    user_list = wtk(user_ask)
    user_ask_len = len(user_list)
    i, j = 0, (user_ask_len - 1)
    for k in range(0, user_ask_len, 1):
        if user_list[k] == 'for':
            i = k + 1
        if user_list[k] == 'on' or user_list[k] == 'in':
            j = k - 1
    for t in range(i, j + 1, 1):
        topic = topic + user_list[t] + ' '
    topic = topic.strip(' ')
    #send data
    summary = ''
    try:
        wiki_list = stk(wk.summary(topic))
        for i in range(0, 5, 1):
            summary = summary + wiki_list[i]
    except wk.exceptions.DisambiguationError:
        summary = 'Sorry Can\'t retrieve data from internet.'
    return summary

Example #13

0

Show file

import string
from nltk.tokenize import word_tokenize as wtk
freq = {}
with open("C:\\Users\\khadidja\\Downloads\\word_cloud\\98-0.txt",
          "r",
          encoding="utf-8") as src:
    data = src.read()
    data = data.replace("“", " ")
    data = data.replace("”", " ")
    data = data.replace("’", "'")
    tr = str.maketrans("", "", string.punctuation)
    data1 = data.translate(tr)
    data1 = data1.lower()
    words = wtk(data1)

with open("C:\\Users\\khadidja\\Downloads\\word_cloud\\stopwords",
          "r") as stopwords:
    stpwrd = stopwords.read()
    stpwrd = stpwrd.lower()
    stpwrd = wtk(stpwrd)

commonwords = list(set(words) & set(stpwrd))
for word in commonwords:
    words = list(filter(lambda w: w != word, words))

for i in range(len(words)):
    if not (words[i] in freq):
        freq[words[i]] = 1
    else:
        freq[words[i]] = freq[words[i]] + 1