Example #1
0

rateAlpha = alpha(rating, mini, middle, maxn, rateAlpha, length)

# The record of reviews.
titles = np.array(npdata[:, 12])
contents = np.array(npdata[:, 13])

titlePositive = np.empty(length, dtype=float)
contentPositive = np.empty(length, dtype=float)

titleSubjectivity = np.empty(length, dtype=float)
contentSubjectivity = np.empty(length, dtype=float)

for i in range(0, length):
    temp = textblob.TextBlob(str(titles[i]))
    titlePositive[i] = temp.sentiment.polarity
    titleSubjectivity[i] = temp.sentiment.subjectivity
    temp = textblob.TextBlob(str(contents[i]))
    contentPositive[i] = temp.sentiment.polarity
    contentSubjectivity[i] = temp.sentiment.subjectivity


# Function for calcuating the score of reviews.
def markReviews(pos, sub, score, length, x, y):
    for i in range(0, length):
        if pos[i] >= 0:
            score[i] = pow(x, pos[i]) / (sub[i] + y)
        else:
            score[i] = pos[i] * pow(x, pos[i]) / (abs(pos[i]) * y *
                                                  (sub[i] + 1))
Example #2
0
 def test_np_extractor_is_shared_among_instances(self):
     blob1 = tb.TextBlob("This is one sentence")
     blob2 = tb.TextBlob("This is another sentence")
     assert_true(blob1.np_extractor is blob2.np_extractor)
Example #3
0
 def test_can_use_different_sentanalyzer(self):
     blob = tb.TextBlob("I love this car", analyzer=NaiveBayesAnalyzer())
     assert_true(isinstance(blob.analyzer, NaiveBayesAnalyzer))
Example #4
0
 def test_words_includes_apostrophes_in_contractions(self):
     blob = tb.TextBlob("Let's test this.")
     assert_equal(blob.words, tb.WordList(['Let', "'s", "test", "this"]))
     blob2 = tb.TextBlob("I can't believe it's not butter.")
     assert_equal(blob2.words, tb.WordList(['I', 'ca', "n't", "believe",
                                         'it', "'s", "not", "butter"]))
Example #5
0
 def test_pos_tags_includes_one_letter_articles(self):
     blob = tb.TextBlob("This is a sentence.")
     assert_equal(blob.pos_tags[2][0], 'a')
Example #6
0
 def test_sentiment_of_foreign_text(self):
     blob = tb.TextBlob(u'Nous avons cherch\xe9 un motel dans la r\xe9gion de '
         'Madison, mais les motels ne sont pas nombreux et nous avons '
         'finalement choisi un Motel 6, attir\xe9s par le bas '
         'prix de la chambre.')
     assert_true(isinstance(blob.sentiment[0], float))
Example #7
0
 def test_len(self):
     blob = tb.TextBlob('lorem ipsum')
     assert_equal(len(blob), len('lorem ipsum'))
Example #8
0
def parse_graf(doc_id, graf_text, base_idx):
    """CORE ALGORITHM: parse and markup sentences in the given paragraph"""

    global DEBUG
    global POS_KEEPS, POS_LEMMA, TAGGER

    markup = []
    new_base_idx = base_idx

    for sent in textblob.TextBlob(graf_text).sentences:
        graf = []
        digest = hashlib.sha1()

        tagged_sent = TAGGER.tag(str(sent))
        tag_idx = 0
        raw_idx = 0

        if DEBUG:
            print(tagged_sent)

        while tag_idx < len(tagged_sent):
            pos_tag = tagged_sent[tag_idx]
            word = WordNode(word_id=0,
                            raw=pos_tag[0],
                            root=pos_tag[0],
                            pos=pos_tag[1],
                            keep=0,
                            idx=new_base_idx)

            if DEBUG:
                print("IDX", tag_idx, pos_tag)
                print("reg", is_not_word(pos_tag[0]))
                print("   ", raw_idx, len(sent.words), sent.words)
                print(graf)

            if is_not_word(pos_tag[0]) or (pos_tag[1] == "SYM"):
                parsed_raw = pos_tag[0]
                pos_family = '.'
                word = word._replace(pos=pos_family)
            elif raw_idx < len(sent.words):
                parsed_raw = sent.words[raw_idx]
                pos_family = pos_tag[1].lower()[0]
                raw_idx += 1

            word = word._replace(raw=str(parsed_raw))

            if pos_family in POS_LEMMA:
                word = word._replace(root=str(
                    parsed_raw.singularize().lemmatize(pos_family)).lower())
            elif pos_family != '.':
                word = word._replace(root=str(parsed_raw).lower())
            else:
                word = word._replace(root=str(parsed_raw))

            if pos_family in POS_KEEPS:
                word = word._replace(word_id=get_word_id(word.root), keep=1)

            digest.update(word.root.encode("utf-8"))

            # schema: word_id, raw, root, pos, keep, idx
            if DEBUG:
                print(word)

            graf.append(list(word))

            new_base_idx += 1
            tag_idx += 1

        markup.append(ParsedGraf(id=doc_id, sha1=digest.hexdigest(),
                                 graf=graf))

    return markup, new_base_idx
             time.sleep(timeToSleep + 1)
         else:
             time.sleep(1)
                   
 if i % 100 == 0:
     print "on id number: ", i
     sys.stdout.flush()
 i = i + 1
 counter = counter + 1
 try:
     for status in tweepy.Cursor(api.user_timeline, user_id = idno, since_id = carsonDropsOutTweetId).items(20):
         statusText = status.text.lower()
         for person in personSentimentList:
             for keyword in person[2]:
                 if keyword in statusText:
                     tb = textblob.TextBlob(statusText)
                     person[1].append((tb.sentiment.polarity, status.id))
                     mentionFlag = True
                     break
         for issue in issueSentimentList:
             for keyword in issue[2]:
                 if keyword in statusText:
                     tb = textblob.TextBlob(statusText)
                     issue[1].append((tb.sentiment.polarity, status.id))
                     mentionFlag = True
                     break
 except KeyboardInterrupt:
     raise
 except:
     print sys.exc_info()[0]
     sys.stdout.flush()
Example #10
0
import tweepy
import textblob

import os
from dotenv import load_dotenv
load_dotenv()

consumer_key = os.getenv("consumer_key")
consumer_secret = os.getenv("consumer_secret")

access_token = os.getenv("access_token")
access_secret = os.getenv("access_secret")

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)

api = tweepy.API(auth)

searched = input()

public_tweets = api.search(searched)

for tweet in public_tweets:
    print(tweet.text)
    analysis = textblob.TextBlob(tweet.text)
    print(analysis.sentiment)
    print('')
Example #11
0
 def __init__(self, *args, **kwargs):
     models.Model.__init__(self, *args, **kwargs)
     self.blob = textblob.TextBlob(self.content)
Example #12
0
def method1():
    with open("../data/haiku/gen.json") as f:
        r = f.read()

    r = json.loads(r)
    keyWords = getKeywords()
    lemmatized_keywords = []
    for keyword in keyWords:
        lemmatized_keywords.append(textblob.Word(keyword).lemmatize())

    index = 0
    match_found = 0
    for key, value in r.items():
        #value["sentence"] = scramble(value["sentence"],value["features"])
        l = textblob.TextBlob(value["sentence"])

        #print(value["sentence"])
        l1 = l.ngrams(n=1)
        l2 = l.ngrams(n=2)
        l3 = l.ngrams(n=3)
        l1 = [" ".join(i) for i in l1]
        l2 = [" ".join(i) for i in l2]
        l3 = [" ".join(i) for i in l3]
        l = []
        for i in l1:
            l.append(i)
        for i in l2:
            l.append(i)
        for i in l3:
            l.append(i)

        #print(l)
        found_keywords = []
        for word in l:
            w = textblob.Word(word)
            if w.lemmatize() in lemmatized_keywords:
                found_keywords.append(w.lemmatize())

        print(found_keywords)

        actual_keywords = [
            textblob.Word(i).lemmatize() for i in value["features"]
        ]

        print(actual_keywords)

        if found_keywords == actual_keywords or all(
                elem in found_keywords for elem in actual_keywords):
            match_found += 1
        """else:
			print(actual_keywords)
			print(found_keywords)"""
        """index+=1
		if index==10:
			break"""

    acc = match_found / len(r)

    with open("Stats.txt", "a") as f:
        f.write("Accuracy on feature_recognition_2_5.json is: " + str(acc) +
                "\n")

    print("Accuracy = ", acc)
Example #13
0
import textblob as tb
import sentiment_analysis

text = tb.TextBlob("I want to kill someone.")
if 'I' in text.tokenize():
    allpos = text.pos_tags
    verbphrase = []
    for x in allpos:
        if x[1] in [
                'VB', 'VBZ', 'VBP', 'VBD', 'VBN', 'VBG', 'RB', 'RBR', 'RBS'
        ]:
            verbphrase.append(x[0])

print(sentiment_analysis.get_negs(' '.join(verbphrase)))
        return 1

    df['airline_sentiment_confidence']= df['airline_sentiment_confidence'].astype('str')
    df['sentiment']= (df['airline_sentiment']).apply(lambda x: sentiment_to_label(x))
    df= df[['text','sentiment']]

    re_attags= re.compile(" @[^ ]* ")
    re_spaces= re.compile("\w+]")
    df['text']= df['text'].apply(lambda x: re_spaces.sub(" ",re_attags.sub(" ", " "+x+" "))[1:-1])
    df= df.drop_duplicates(subset=['text'])
    df.index= df['id']= xrange(df.shape[0])

    non_alphanums=re.compile('[^A-Za-z]+')
    def normalize_text(text): return non_alphanums.sub(' ', text).lower().strip()
    df['text_normalized']= df['text'].map(lambda x: normalize_text(x))
    df['textblob_score']= df['text_normalized'].map(lambda x: textblob.TextBlob(x).polarity)

    import wordbag_regressor
    print "Train wordbag regressor"
    wordbag_regressor= wordbag_regressor.WordbagRegressor("../models/wordbag_model.pkl.gz", tripadvisor_dir)
    #wordbag_regressor= wordbag_regressor.WordbagRegressor("../models/wordbag_model.pkl.gz")
    df['wordbag_score']= wordbag_regressor.predict(df['text'].values)

    import wordhash_regressor
    print "Train wordhash regressor"
    wordhash_regressor= wordhash_regressor.WordhashRegressor("../models/wordhash_model.pkl.gz", tripadvisor_dir)
    #wordhash_regressor= wordhash_regressor.WordhashRegressor("../models/wordhash_model.pkl.gz")
    df['wordhash_score']= wordhash_regressor.predict(df['text'].values)

    import wordseq_regressor
    print "Train wordseq regressor"
Example #15
0
 def test_clean_html(self):
     html = '<b>Python</b> is a widely used <a href="/wiki/General-purpose_programming_language" title="General-purpose programming language">general-purpose</a>, <a href="/wiki/High-level_programming_language" title="High-level programming language">high-level programming language</a>.'
     assert_raises(NotImplementedError, lambda: tb.TextBlob(html, clean_html=True))
    re_attags = re.compile(" @[^ ]* ")
    re_spaces = re.compile("\w+]")
    df['text'] = df['text'].apply(
        lambda x: re_spaces.sub(" ", re_attags.sub(" ", " " + x + " "))[1:-1])
    df = df.drop_duplicates(subset=['text'])
    df.index = df['id'] = range(df.shape[0])

    non_alphanums = re.compile('[^A-Za-z]+')

    def normalize_text(text):
        return non_alphanums.sub(' ', text).lower().strip()

    df['text_normalized'] = df['text'].map(lambda x: normalize_text(x))
    df['textblob_score'] = df['text_normalized'].map(
        lambda x: textblob.TextBlob(x).polarity)

    import wordbag_regressor
    print("Train wordbag regressor")
    wordbag_regressor = wordbag_regressor.WordbagRegressor(
        "../models/wordbag_model.pkl.gz", tripadvisor_dir)
    #wordbag_regressor= wordbag_regressor.WordbagRegressor("../models/wordbag_model.pkl.gz")
    df['wordbag_score'] = wordbag_regressor.predict(df['text'].values)

    import wordhash_regressor
    print("Train wordhash regressor")
    wordhash_regressor = wordhash_regressor.WordhashRegressor(
        "../models/wordhash_model.pkl.gz", tripadvisor_dir)
    #wordhash_regressor= wordhash_regressor.WordhashRegressor("../models/wordhash_model.pkl.gz")
    df['wordhash_score'] = wordhash_regressor.predict(df['text'].values)
Example #17
0
 def test_senences_with_space_before_punctuation(self):
     text = "Uh oh. This sentence might cause some problems. : Now we're ok."
     b = tb.TextBlob(text)
     assert_equal(len(b.sentences), 3)
url = 'http://localhost:5984/japon/_design/japon28JuneHashtag/_view/japon28JuneHashtag'
req = urllib2.Request(url)
f = urllib2.urlopen(req)

d = json.loads(f.read())

archivo = open("/home/usrkap/Downloads/ResultadoJaponHashtag.txt",
               "a")  #opens file with name of "test.txt"
cont_positives = 0
cont_negatives = 0
cont_neutrals = 0
cont_total = 0

for x in d['rows']:
    a = x['value']
    texto_tweet = textblob.TextBlob(a)

    auc = ''

    if texto_tweet.sentiment.polarity > 0:
        aux = a + ';positive'
        cont_positives = cont_positives + 1
    elif texto_tweet.sentiment.polarity < 0:
        aux = a + ';negative'
        cont_negatives = cont_negatives + 1
    else:
        aux = a + ';neutral'
        cont_neutrals = cont_neutrals + 1

    archivo.write(str((aux.encode("utf-8") + "\n")))
    cont_total = cont_total + 1
Example #19
0
 def test_raw_sentences(self):
     blob = tb.TextBlob(self.text)
     assert_equal(len(blob.raw_sentences), 19)
     assert_equal(blob.raw_sentences[0], "Beautiful is better than ugly.")
Example #20
0
 def test_translate_detects_language_by_default(self):
     blob = tb.TextBlob(unicode("ذات سيادة كاملة"))
     assert_equal(blob.translate(), "With full sovereignty")
Example #21
0
 def test_repr(self):
     blob1 = tb.TextBlob('lorem ipsum')
     if PY2:
         assert_equal(repr(blob1), b"TextBlob(\"{0}\")".format(binary_type('lorem ipsum')))
     else:
         assert_equal(repr(blob1), "TextBlob(\"{0}\")".format('lorem ipsum'))
Example #22
0
 def test_string_equality(self):
     blob = tb.TextBlob("Textblobs should be equal to strings.")
     assert_equal(blob, "Textblobs should be equal to strings.")
Example #23
0
 def test_tagging_nonascii(self):
     b = tb.TextBlob('Learn how to make the five classic French mother sauces: '
                     'Béchamel, Tomato Sauce, Espagnole, Velouté and Hollandaise.')
     tags = b.tags
     assert_true(isinstance(tags[0][0], unicode))
Example #24
0
 def test_string_comparison(self):
     blob = tb.TextBlob("apple")
     assert_true(blob < "banana")
     assert_true(blob > 'aardvark')
Example #25
0
 def test_np_extractor_defaults_to_fast_tagger(self):
     text = "Python is a high-level scripting language."
     blob1 = tb.TextBlob(text)
     assert_true(isinstance(blob1.np_extractor, FastNPExtractor))
Example #26
0
 def test_hash(self):
     blob = tb.TextBlob('apple')
     assert_equal(hash(blob), hash('apple'))
     assert_not_equal(hash(blob), hash('banana'))
Example #27
0
 def test_can_use_different_np_extractors(self):
     e = ConllExtractor()
     text = "Python is a high-level scripting language."
     blob = tb.TextBlob(text)
     blob.np_extractor = e
     assert_true(isinstance(blob.np_extractor, ConllExtractor))
Example #28
0
 def test_stripped(self):
     blob = tb.TextBlob("Um... well this ain't right.!..")
     assert_equal(blob.stripped, "um well this aint right")
Example #29
0
 def test_discrete_sentiment(self):
     blob = tb.TextBlob("I feel great today.", analyzer=NaiveBayesAnalyzer())
     assert_equal(blob.sentiment[0], 'pos')
df_sum['clean'] = lyr_lemmatized
df_sum.head()

# In[27]:

cleaned = df_sum.clean.to_string()

# In[28]:

# overall sentiment of all lyrics
# textblob uses a lookup dictionary for sentiment and subjectivity
TextBlob(cleaned).sentiment

# In[29]:

tb = textblob.TextBlob(df_sum.clean[0])
tb.sentiment_assessments

# In[30]:

# naive bayes sentiment classification, sentiment probabilities
nb = textblob.en.sentiments.NaiveBayesAnalyzer()
nb.analyze(df_sum.clean[0])

# In[31]:


def sentiment_score(clean_notes):
    score = analyzer.polarity_scores(clean_notes)
    weight = score['compound']
    if weight >= 0.1: