Python TextBlob.TextBlob Exemples, text.blob.TextBlob.TextBlob Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : grapher.py Projet : jenalgit/WordGraph

    def initialize_document(self, doc, docs_list_mode=False):
        if not docs_list_mode:
            self.doc = doc.lower()

            self.blob = TextBlob(text=self.doc, tokenizer=self.tokenizer)
            self.tokens = copy.deepcopy(self.blob.tokens)

            self.bigrams = self.bigramify(self.blob.tokens)
            self.tokens.extend(self.bigrams)

            self.trigrams = self.trigramify(self.blob.tokens)
            self.tokens.extend(self.trigrams)
        else:
            doc = doc.lower()

            blob = TextBlob(text=doc, tokenizer=self.tokenizer)
            tokens = copy.deepcopy(blob.tokens)

            bigram = self.bigramify(tokens=tokens)
            tokens.extend(bigram)

            trigram = self.trigramify(tokens=tokens)
            tokens.extend(trigram)

            return tokens

Exemple #2

0

Afficher le fichier

Fichier : test_taggers.py Projet : ouassimBenMosbah/sms_analysis

 def test_tag_blob(self):
     blob = TextBlob(self.text, pos_tagger=self.tagger)
     tags = blob.tags
     logging.debug("tags: {0}".format(tags))
     words = self.text.split()
     for i, word_tag in enumerate(tags):
         assert_equal(word_tag[0], words[i])

Exemple #3

0

Afficher le fichier

def sentences_sentiment():
    text = get_text(request)
    blob = TextBlob(text)
    sentences = [{
        "sentence": unicode(s),
        "sentiment": s.sentiment[0]
    } for s in blob.sentences]
    return jsonify({"result": sentences})

Exemple #4

0

Afficher le fichier

Fichier : summarize.py Projet : shubhampachori12110095/summarize-2

def sentence_to_words(sentence):
    """
    Converts passed sentences into a list of words.
    Returns all words but stop words.
    """
    blob = TextBlob(sentence)
    return [
        word.lower() for word in blob.words
        if word not in stopwords.words('english')
    ]

Exemple #5

0

Afficher le fichier

Fichier : textblob-api-server.py Projet : sguignot/textblob-api-server

 def create_blob(self, request_json):
     options = {}
     if request_json.get('analyzer') == 'NaiveBayesAnalyzer':
         options['analyzer'] = self.naive_bayes_analyzer
     if request_json.get('np_extractor') == 'ConllExtractor':
         options['np_extractor'] = self.conll_extractor
     if request_json.get('pos_tagger') == 'NLTKTagger':
         options['pos_tagger'] = self.nltk_tagger
     elif request_json.get('pos_tagger') == 'PerceptronTagger':
         options['pos_tagger'] = self.perceptron_tagger
     return TextBlob(request_json['text'], **options)

Exemple #6

0

Afficher le fichier

Fichier : CorpusBase.py Projet : jeffzhengye/yelib

 def cache_sentences(self):
     self.cached = True
     self.cache_list = []
     for key, value in self.__dict__.items():
         if key not in filterTag:
             try:
                 blob = TextBlob(value)
                 for sentence in blob.sentences:
                     self.cache_list.append(sentence)
             except Exception as e:
                 logger.debug("textblob error| %s:%s" % (key, value))

Exemple #7

0

Afficher le fichier

    def one_sentence_from(self, quote):
        """Reduce the given quote to a single sentence.

        The choice is biased against the first sentence, which is less likely
        to be the start of a real in-text sentence.
        """
        blob = TextBlob(quote)
        try:
            sentences = blob.sentences
        except Exception, e:
            # TextBlob can't parse this. Just return the whole string
            return quote

Exemple #8

0

Afficher le fichier

Fichier : CorpusBase.py Projet : jeffzhengye/yelib

 def to_sentences(self):
     if self.cached:
         for sentence in self.cache_list:
             yield sentence
     else:
         for key, value in self.__dict__.items():
             if key not in filterTag:
                 try:
                     blob = TextBlob(value)
                     for sentence in blob.sentences:
                         yield sentence
                 except Exception as e:
                     logger.debug("textblob error| %s:%s" % (key, value))

Exemple #9

0

Afficher le fichier

    def truncate_at_stopword(self, string):
        # Truncate a string at the last stopword not preceded by
        # another stopword.
        # print "%s =>" % string

        if type(string) == Sentence:
            words = string.words
        else:
            try:
                words = TextBlob(string).sentences
            except Exception, e:
                # TextBlob can't parse this. Just return the whole string
                return string

Exemple #10

0

Afficher le fichier

Fichier : grapher.py Projet : jenalgit/WordGraph

    def freq(self, word, docs=None):
        if docs is None:
            return self.tokens.count(word)
        else:
            if not isinstance(docs, str):
                d = ""
                for item in docs:
                    d = "%s %s" % (d, item)
                docs = d

            blob = TextBlob(text=docs, tokenizer=self.tokenizer)
            blob.tokens.extend(self.bigramify(blob))
            blob.tokens.extend(self.trigramify(blob))
            return blob.tokens.count(word)

Exemple #11

0

Afficher le fichier

Fichier : textblob-api-server.py Projet : sguignot/textblob-api-server

 def __init__(self):
     # create custom components
     self.naive_bayes_analyzer = NaiveBayesAnalyzer()
     self.conll_extractor = ConllExtractor()
     self.nltk_tagger = NLTKTagger()
     self.perceptron_tagger = PerceptronTagger()
     if DEV_ENV:
         return
     # train all components (default and custom)
     text = 'TextBlob blobs great!'
     default_blob = TextBlob(text)
     default_blob.sentiment
     default_blob.noun_phrases
     default_blob.pos_tags
     custom_blob = TextBlob(text,
                            analyzer=self.naive_bayes_analyzer,
                            np_extractor=self.conll_extractor,
                            pos_tagger=self.nltk_tagger)
     custom_blob.sentiment
     custom_blob.noun_phrases
     custom_blob.pos_tags
     custom2_blob = TextBlob(text, pos_tagger=self.perceptron_tagger)
     custom2_blob.pos_tags

Exemple #12

0

Afficher le fichier

    def rate(cls, s, base_score=1.0, frequencies=None, obscurity_cutoff=None):
        "Rate a string's suitability as an _ebook quote."
        s = s.strip()
        score = float(base_score)
        # print s
        # print " Starting rating: %.2f" % score

        # People like very short or very long quotes.
        # if len(s) < 40:
        #    score *= 2
        if len(s) > 128:
            score *= 2
            # print " Length bonus: %.2f" % score

        blob = TextBlob(s.decode("utf8"))
        try:
            words = blob.words
        except Exception, e:
            # TODO: I'm sick of trying to get TextBlob to parse
            # strings that include things like ". . . ". Just return
            # the current score.
            return score

Exemple #13

0

Afficher le fichier

    def quotes_in(self, paragraph):
        para = textwrap.wrap(paragraph, self.wrap_at)
        if len(para) == 0:
            return

        probability = self.probability
        if para[0][0].upper() == para[0][0]:
            # We greatly prefer lines that start with capital letters.
            probability *= 5
        else:
            probability /= 4

        gathering = False
        in_progress = None
        last_yield = None
        for i in range(len(para)):
            line = para[i]
            if gathering:
                # We are currently putting together a quote.
                done = False
                if (random.random() < self.truncate_chance
                    and len(in_progress) >= self.minimum_quote_size):
                    # Yield a truncated quote.
                    done = True
                else:
                    potential = in_progress + ' ' + line.strip()
                    if len(potential) >= self.maximum_quote_size:
                        # That would be too long. We're done.
                        done = True
                    else:
                        in_progress = potential

                if done:
                    quote = in_progress
                    in_progress = None
                    gathering = done = False

                    # Miscellaneous tweaks to increase the chance that
                    # the quote will be funny.
                    if random.random() < 0.6:
                        quote = self.one_sentence_from(quote)

                    if random.random() < 0.4:
                        quote = self.truncate_at_stopword(quote)

                    # Quotes that end with two consecutive stopwords
                    # are not funny. It would be best to parse every
                    # single quote and make sure it doesn't end with
                    # two consecutive stopwords. But in practice it's
                    # much faster to just check for the biggest
                    # offenders, which all end in 'the', and then trim
                    # the 'the'.
                    low = quote.lower()
                    for stopwords in ('of the', 'in the', 'and the',
                                      'in the', 'on the', 'for the'):
                        if low.endswith(stopwords):
                            quote = quote[:len(" the")-1]
                            break

                    quote = unicode(quote)
                    quote = self.remove_ending_punctuation(quote)
                    quote = self.remove_beginning_punctuation(quote)

                    if random.random() > 0.75:
                        quote = self.truncate_to_common_word(quote)

                    if (len(quote) >= self.minimum_quote_size
                        and len(quote) <= self.maximum_quote_size
                        and self.ONE_LETTER.search(quote)):
                        yield quote
                        last_yield = quote
                        continue
            else:
                # We are not currently gathering a quote. Should we
                # be?
                r = random.random()
                if random.random() < probability:
                    # Run the regular expression and see if it matches.
                    m = self.SEVERAL_CAPITALIZED_WORDS.search(line)
                    if m is not None:
                        phrase = m.groups()[0]
                        if "Gutenberg" in phrase or "Proofreader" in phrase:
                            # Part of the meta, not part of text.
                            continue
                        # Tag the text to see if it's a proper noun.
                        blob = TextBlob(phrase)
                        tags = blob.tags
                        proper_nouns = [x for x, tag in tags if tag.startswith('NNP')]
                        if len(proper_nouns) < len(tags) / 3.0:
                            # We're good.
                            yield phrase
                            continue

                matches = self._line_matches(line)
                if matches or random.random() < probability:
                    gathering = True
                    if matches:
                        # A keyword match! Start gathering a quote either
                        # at this line or some earlier line.
                        maximum_backtrack = (
                            self.maximum_quote_size / self.wrap_at) - 1
                        backtrack = random.randint(0, maximum_backtrack)
                        start_at = max(0, i - backtrack)
                        in_progress = " ".join(
                            [x.strip() for x in para[start_at:i+1]])
                    else:
                        in_progress = line.strip()

Exemple #14

0

Afficher le fichier

Fichier : shit.py Projet : jluc19/disambiguator

         ('I am tired of this stuff.', 'neg'),
         ("I can't deal with this", 'neg'), ('He is my sworn enemy!', 'neg'),
         ('My boss is horrible.', 'neg')]
test = [('The beer was good.', 'pos'), ('I do not enjoy my job', 'neg'),
        ("I ain't feeling dandy today.", 'neg'), ("I feel amazing!", 'pos'),
        ('Gary is a friend of mine.', 'pos'),
        ("I can't believe I'm doing this.", 'neg')]

cl = NaiveBayesClassifier(train)

# Classify some text
print(cl.classify("Their burgers are amazing."))  # "pos"
print(cl.classify("I don't like their pizza."))  # "neg"

# Classify a TextBlob
blob = TextBlob(
    "The beer was amazing. But the hangover was horrible. "
    "My boss was not pleased.",
    classifier=cl)
print(blob)
print(blob.classify())

for sentence in blob.sentences:
    print(sentence)
    print(sentence.classify())

# Compute accuracy
print("Accuracy: {0}".format(cl.accuracy(test)))

# Show 5 most informative features
cl.show_informative_features(5)

Exemple #15

0

Afficher le fichier

def sentiment():
    text = get_text(request)
    sentiment = TextBlob(text).sentiment[0]  # Polarity score
    return jsonify({"result": sentiment})

Exemple #16

0

Afficher le fichier

def noun_phrases():
    text = get_text(request)
    noun_phrases = set(TextBlob(text).noun_phrases)
    # Strip punctuation from ends of noun phrases and exclude long phrases
    stripped = [strip_punc(np) for np in noun_phrases if len(np.split()) <= 5]
    return jsonify({"result": stripped})

Exemple #17

0

Afficher le fichier

 def test_blob_analyze(self):
     pos_blob = TextBlob(self.pos, analyzer=self.analyzer)
     assert_true(pos_blob.sentiment[0] > 0.0)
     neg_blob = TextBlob(self.neg, analyzer=self.analyzer)
     assert_true(neg_blob.sentiment[0] < 0.0)

Exemple #18

0

Afficher le fichier

  # writer.writerow(columns)
  global_row = 0  # xlsxwriter doesn't have a writerow(function), so we have to keep track of what row we're on
  columns = ['unit', 'id_article', 'position (+1)', 'unit_content', 'adjectives', 'verbs', 'article_title', 'article_content_no_tags', 'article_url']
  workbook = xlsxwriter.Workbook(publication_prefix+'full_report.xlsx')  # Create new spreadsheet
  worksheet = workbook.add_worksheet()  # Make new worksheet
  for col in range(0, len(columns)):
    worksheet.write(global_row, col, columns[col])
  global_row += 1


for row in ngo_mentions:  # Loop through all rows in the database results
  # Use TextBlob to parse the article
  # blob.tags returns the following parts of speech (some are missing, like VBN, etc.):
  #   noun (NN), adjective (JJ), determiner (DT), verb (VB), noun phrase (NP),
  #   sentence subject (SBJ), and prepositional noun phrase (PNP)
  blob = TextBlob(row['article_content_no_tags'])

  # Split the article into paragraphs
  paragraphs = (re.split('(\n)+', row['article_content_no_tags']))
  paragraphs = [paragraph for paragraph in paragraphs if paragraph != "\n"]
  paragraphs_lower = [paragraph.lower() for paragraph in paragraphs]

  # Add line numbers
  # enumerate(list, 1) results in (list1, 1), (list2, 2), etc.
  article_numbered = ['(' + str(paragraph[0]) + ') ' + paragraph[1] for paragraph in enumerate(paragraphs, 1)]
  csv_article = '\n'.join(article_numbered)

  # Get a list of all the paragraphs that mention one of the organizations
  paragraph_position = [i for i, x in enumerate(paragraphs_lower) if any(org.lower() in x for org in organizations)]

  # Split the article into sentences

Exemple #19

0

Afficher le fichier

Fichier : summarize.py Projet : shubhampachori12110095/summarize-2

def content_to_sentences(text):
    """
    Converts passed text into a list of sentences.
    """
    blob = TextBlob(text)
    return [str(sentence) for sentence in blob.sentences]

Exemple #20

0

Afficher le fichier

Fichier : text_textblob.py Projet : sichumon/pycheat

# -*- coding: utf-8 -*-
"""
Created on Fri Oct  4 09:44:50 2013

@author: ozdemircili
"""

from text.blob import TextBlob

text = TextBlob(
    "Once upon a time a there was a program called Pycheat.It was one of the cheats"
)

text.tags

text.noun_phrases

text.sentiment

text.words

text.sentences

text.title

text.words[-1].singularize()
text.words[3].pluralize()

from text.blob import Word
from text.blob import Verb

Exemple #21

0

Afficher le fichier

Fichier : tweetAnalyser.py Projet : ddevlin678/analysis

 def text(self, tweetObject):
  analysis = TextBlob(tweetObject)
  print("HELLO")
  print(analysis.sentiment)