Python TextBlob.tokenize Exemples, textblob.TextBlob.tokenize Python Exemples

Exemple #1

0

Afficher le fichier

def translate(text):
    blob = TextBlob(text)
    transl = blob.translate(to='en')

    resuljsondetail(blob.tags, blob.noun_phrases, blob.word_counts, blob.words,
                    blob.tokenize(), blob.sentiment_assessments, transl)
    return transl

Exemple #2

0

Afficher le fichier

Fichier : app 3.py Projet : bhangun/covidsentiment

def analisis(text):
    blob = TextBlob(text)
    #lang = blob.detect_language()
    transl = ''
    polarity = 0
    #sentences = blob.sentences

    #if (lang != 'en'):
    transl = blob.translate(to='en')
    enBlob = transblob(str(transl))
    blob = enBlob
    sentences = enBlob.sentences

    for sentence in sentences:
        polarity += sentence.sentiment.polarity

    percent = round(polarity * 100)

    print(percent)

    result = jsonify({
        "polarity": percent,
        "positive": posneg(percent),
        "negative": neg(percent),
        "isHoax": is_hoax(percent),
        #"language":lang,
        "tags": blob.tags,
        "noun_phrases": blob.noun_phrases,
        "word_counts": blob.word_counts,
        "words": blob.words,
        "tokenize": blob.tokenize(),
        "sentiment_assessments": blob.sentiment_assessments,
        "translation": transl
    })
    return result

Exemple #3

0

Afficher le fichier

Fichier : personal_assistant.py Projet : MichalKacprzak99/jarvis

    def process_order(self, order: str, source: sr.Microphone):
        """
        Converted voice command is processed using nltk,TextBlob.vectorizer
        Command sentence is tokenized and filtered in purpose to catch "hot" words and decide
        if sentence is connected with any implemented feature.

        :param order: str
            Voice command converted to text
        :param source: speech_recognition.Microphone
            object of speech_recognition.Microphone,  which represents a physical microphone on the computer
        :return: None
        """
        sentence_to_analyze = TextBlob(order)
        self.check_sentence_polarity(sentence_to_analyze)

        order_vector = self.vectorizer.transform([order]).toarray()
        command_category = self.classifier.predict(order_vector)[0]
        if self.check_command_category(source, command_category):
            tokenized_order = sentence_to_analyze.tokenize()
            preprocess_order = [
                word for word in tokenized_order
                if word not in stopwords.words('english')
            ]
            self.commands[command_category](source, preprocess_order)
        else:
            self.convert_text_to_speech(BasicPhrases.NO_COMMEND)

Exemple #4

0

Afficher le fichier

Fichier : match.py Projet : Jacobe2169/my_toolbox

 def match_syntagm_text_blob_multi(syntagms, text):
     from textblob import TextBlob
     from textblob_fr import PatternTagger, PatternAnalyzer
     blob = TextBlob(text,
                     pos_tagger=PatternTagger(),
                     analyzer=PatternAnalyzer())
     return match_sequences(syntagms, list(blob.tokenize()))

Exemple #5

0

Afficher le fichier

Fichier : vegan_analyser.py Projet : samarth-math/Facebook_Hackathon

def process_keywords(input_file):
    list = []
    for line in input_file:
        blob = TextBlob(line, pos_tagger=nltkTagger)
        kwds = blob.tokenize()
        if len(kwds) > 2:
            kwds = blob.noun_phrases
        list.append(kwds)
    return list

Exemple #6

0

Afficher le fichier

Fichier : vegan_analyser.py Projet : samarth-math/Facebook_Hackathon

def process_keywords(input_file):
    list = []
    for line in input_file:
        blob = TextBlob(line, pos_tagger=nltkTagger)
        kwds = blob.tokenize()
        if len(kwds) > 2:
            kwds = blob.noun_phrases
        list.append(kwds)
    return list

Exemple #7

0

Afficher le fichier

def translate(text):
    blob = TextBlob(text)
    transl = blob.translate(to='en')
    return {
            "tags": blob.tags,
            "noun_phrases": blob.noun_phrases,
            "word_counts": blob.word_counts,
            "words":blob.words,
            "tokenize":blob.tokenize(),
            "sentiment_assessments":blob.sentiment_assessments,
            "translation":transl
        }

Exemple #8

0

Afficher le fichier

Fichier : transformer.py Projet : s-ghosh/aiheath

    def __call__(self, raw_data: str):
        """Transform the raw_data to a new data
        """
        word_list = []

        # corrrect the words
        t = TextBlob(raw_data).correct()

        # t.tokenize()
        # for w in t.tokenize():
        #     w = self.st.stem(w.lower())
        #     word_list.append(w)

        word_list = [self.st.stem(w.lower()) for w in t.tokenize()]

        s = " ".join(word_list)
        tb = TextBlob(s).correct()

        return str(tb)

Exemple #9

0

Afficher le fichier

Fichier : app.py Projet : bhangun/covidsentiment

def translate(text):
    blob = TextBlob(text)
    lang = blob.detect_language()
    transl = ''
    p = 0
    sa = []
    if lang != 'en':  #selain bahasa inggris masuk kesini
        transl = blob.translate(to='en')
        pol = polarity(str(transl))
        p = pol[0]
        sa = pol[1]
    else:
        p = polarity(text)[0]
        sa = blob.sentiment_assessments

    return [
        transl, lang, blob.tags, blob.noun_phrases, blob.word_counts,
        blob.words,
        blob.tokenize(), sa, text, p
    ]

Exemple #10

0

Afficher le fichier

Fichier : wordnet.py Projet : periode/software-art-text

        entry = {'word': word, 'synonyms': []}
        for synset in word.get_synsets(pos='n'):
            for syn in synset.lemmas():
                entry['synonyms'].append(syn.name().replace('_', ' '))

        verbs.append(entry)

    elif tag == "JJ":
        entry = {'word': word, 'synonyms': []}
        for synset in word.get_synsets(pos='n'):
            for syn in synset.lemmas():
                entry['synonyms'].append(syn.name().replace('_', ' '))

        adjectives.append(entry)

for token in processed_paragraph.tokenize():

    for entry in nouns:
        if token == entry['word']:

            if len(entry['synonyms']) != 0:
                synonym = random.choice(entry['synonyms'])
                paragraph = paragraph.replace(token, synonym)

    for entry in verbs:
        if token == entry['word']:

            if len(entry['synonyms']) != 0:
                synonym = random.choice(entry['synonyms'])
                paragraph = paragraph.replace(token, synonym)

Exemple #11

0

Afficher le fichier

Fichier : demo.py Projet : sky96line/VisualCode

from textblob import TextBlob

text = "What am i are you dooing here?"

blob = TextBlob(text)


ans = blob.tokenize()
print ans
'''
t = blob.correct()
print t

word = blob.words
print word

word = blob.word_counts
print word

for every in blob.sentiment_assessments:
  print every
'''

snt = blob.sentiment.polarity
print snt

Exemple #12

0

Afficher le fichier

Fichier : utils.py Projet : Atala/arguman.org

def noun_phrases(text):
    blob = TextBlob(text)
    return blob.tokenize()

Exemple #13

0

Afficher le fichier

from textblob import TextBlob  #first, we need to import TextBlob, the package that will help us analyze text

source_text = "Don't tell me the moon is shining; show me the glint of light on broken glass."  #chekov

processed_text = TextBlob(
    source_text)  #in order for us to process it, we pass it to TextBlob
nouns = []  #this is the list where we will store all our nouns
verbs = []  #this one is where we will store verbs
adjectives = []  #this is where we store adjectives

print "\n======================\n"

# TOKENIZING is the process by which you can separate the sentence in individual tokens (essentially words, suffixes, punctuation)
for word in processed_text.tokenize():
    print word

print "\n======================\n"

# PARTS OF SPEECH allows you to get the grammatical role of each word
for word, tag in processed_text.tags:
    print "word: %s || part-of-speech: %s" % (word, tag)
    if tag == "NN":  #here we are looking for nouns (NN)
        nouns.append(word)  #if we find one, we append it to our list
    elif tag == "VB":  #here we look for verbs (VB)
        verbs.append(word)
    elif tag == "JJ":  #and here for adjectives
        adjectives.append(word)

print "\n======================\n"

# SYNSETS are specific structures related to Wordnet, through which you can get all the sets of related words given a specific word

Exemple #14

0

Afficher le fichier

Fichier : b_travesty.py Projet : periode/software-art-text

print source
custom_dictionary = []

for word, tag in processed.tags:
    if tag == 'NN':

        entry = {               # each of our entries in our dictionary
            'word': word,       # has the initial word
            'others': []        # as well as a list of other possibilities
        }

        for synset in word.get_synsets(pos="n"):
            for syn in synset.lemmas():                                 # here we loop through the list of lemmas that are related to the current noun
                entry['others'].append(syn.name().replace('_', ' '))    # we also replace any possible '_' character with a ' ' space character when we add it to our list of other possibilities
                if syn.antonyms():
                    entry['others'].append(syn.antonyms()[0].name().replace('_', ' '))

        custom_dictionary.append(entry) # then we add the entry to our dictionary


# this the part where we actually replace the source text
for token in processed.tokenize():                      # we need to tokenize it in order to make sure we get each part of the sentences
    for entry in custom_dictionary:                     # then for each token, we go through our custom dictionary
        if token == entry['word']:                      # if we match the word

            if len(entry['others']) != 0:               # and if we actually do have a word to replace it with!
                other = random.choice(entry['others'])  # then we pick a random alternative
                source = source.replace(token, other)   # and we replace it in the source text

print source

Exemple #15

0

Afficher le fichier

Fichier : handle_spelling.py Projet : junailin/mode_zoo

Created on Mon Apr  9 17:25:19 2018

@author: miaoji
"""
import nltk.tokenize as nt
from textblob import TextBlob
import time

start_time = time.time()

in_file = open("/data/zhangbin/caozhaojun/true_procress_data/daodao_en.txt",
               'r')
out_file = open("handle_daodao_en.txt", 'a+')
tokenizer = nt.TweetTokenizer()

line_id = 0
for line in in_file.readlines():
    line_id += 1
    if line_id % 1000 == 0:
        print(line_id)
    correct_line = TextBlob(line.lower().replace('...',
                                                 ' ').strip())  #.correct()
    token_line = correct_line.tokenize(tokenizer)
    final_line = ' '.join([word for word in token_line])
    out_file.write(final_line + '\n')
in_file.close()
out_file.close()

end_time = time.time()
print(float(end_time - start_time))

Exemple #16

0

Afficher le fichier

# toNote: pluralize & singularize!
print(attack_blob.words.singularize())
print(attack_blob.words.pluralize())

print(attack_blob.word_counts['of'])

print(attack_blob.ngrams(n=2))
print(attack_blob.ngrams(n=4))

from textblob import Word
for word in attack_blob.words:
    print(Word(word).correct() == word)

#%% Example from https://www.analyticsvidhya.com/blog/2018/02/natural-language-processing-for-beginners-using-textblob/
av_blob = TextBlob("Analytics Vidhya is a great platform to learn data science. \n It helps community through blogs, hackathons, discussions,etc.")
print(av_blob.tokenize())
print(av_blob.sentences, av_blob.sentences[0])

for phrase in av_blob.noun_phrases:
    print(phrase)  # analytics vidhya; great platform; data science

# toNote: part-of-speech tagging
for words, tag in av_blob.tags:
    print(words, tag)

# inflection - process of word formation in which characters are added to the base form of a word to express grammatical meanings.
# words inflection and lemmatization
print(av_blob.sentences[1].words[1].singularize())  # helps -> help

# pluralize
w = Word('Platform')

Exemple #17

0

Afficher le fichier

# Sentiment analyzer train onto movies reviews
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer
blob = TextBlob("I love this library", analyzer=NaiveBayesAnalyzer())
blob.sentiment

# Tokenizer
from nltk.tokenize import TabTokenizer
tokenizer = TabTokenizer()
blob = TextBlob("This is\ta rather tabby\tblob.", tokenizer=tokenizer)
blob.tokens

#This is an alternative way
tokenizer = BlanklineTokenizer()
blob = TextBlob("A token\n\nof appreciation")
blob.tokenize(tokenizer)

# Noun phrase chunkers
from textblob.np_extractors import ConllExtractor
extractor = ConllExtractor()
blob = TextBlob("Python is a high-level programming language.", np_extractor=extractor)
blob.noun_phrases

# POS taggers
from textblob.taggers import NLTKTagger
nltk_tagger = NLTKTagger()
blob = TextBlob("Tag! You're It!", pos_tagger=nltk_tagger)
blob.pos_tags

# Parser
from textblob.parsers import PatternParser

Exemple #18

0

Afficher le fichier

Fichier : utils.py Projet : un-project/un-project.org

def noun_phrases(text):
    blob = TextBlob(text)
    return blob.tokenize()