rateAlpha = alpha(rating, mini, middle, maxn, rateAlpha, length) # The record of reviews. titles = np.array(npdata[:, 12]) contents = np.array(npdata[:, 13]) titlePositive = np.empty(length, dtype=float) contentPositive = np.empty(length, dtype=float) titleSubjectivity = np.empty(length, dtype=float) contentSubjectivity = np.empty(length, dtype=float) for i in range(0, length): temp = textblob.TextBlob(str(titles[i])) titlePositive[i] = temp.sentiment.polarity titleSubjectivity[i] = temp.sentiment.subjectivity temp = textblob.TextBlob(str(contents[i])) contentPositive[i] = temp.sentiment.polarity contentSubjectivity[i] = temp.sentiment.subjectivity # Function for calcuating the score of reviews. def markReviews(pos, sub, score, length, x, y): for i in range(0, length): if pos[i] >= 0: score[i] = pow(x, pos[i]) / (sub[i] + y) else: score[i] = pos[i] * pow(x, pos[i]) / (abs(pos[i]) * y * (sub[i] + 1))
def test_np_extractor_is_shared_among_instances(self): blob1 = tb.TextBlob("This is one sentence") blob2 = tb.TextBlob("This is another sentence") assert_true(blob1.np_extractor is blob2.np_extractor)
def test_can_use_different_sentanalyzer(self): blob = tb.TextBlob("I love this car", analyzer=NaiveBayesAnalyzer()) assert_true(isinstance(blob.analyzer, NaiveBayesAnalyzer))
def test_words_includes_apostrophes_in_contractions(self): blob = tb.TextBlob("Let's test this.") assert_equal(blob.words, tb.WordList(['Let', "'s", "test", "this"])) blob2 = tb.TextBlob("I can't believe it's not butter.") assert_equal(blob2.words, tb.WordList(['I', 'ca', "n't", "believe", 'it', "'s", "not", "butter"]))
def test_pos_tags_includes_one_letter_articles(self): blob = tb.TextBlob("This is a sentence.") assert_equal(blob.pos_tags[2][0], 'a')
def test_sentiment_of_foreign_text(self): blob = tb.TextBlob(u'Nous avons cherch\xe9 un motel dans la r\xe9gion de ' 'Madison, mais les motels ne sont pas nombreux et nous avons ' 'finalement choisi un Motel 6, attir\xe9s par le bas ' 'prix de la chambre.') assert_true(isinstance(blob.sentiment[0], float))
def test_len(self): blob = tb.TextBlob('lorem ipsum') assert_equal(len(blob), len('lorem ipsum'))
def parse_graf(doc_id, graf_text, base_idx): """CORE ALGORITHM: parse and markup sentences in the given paragraph""" global DEBUG global POS_KEEPS, POS_LEMMA, TAGGER markup = [] new_base_idx = base_idx for sent in textblob.TextBlob(graf_text).sentences: graf = [] digest = hashlib.sha1() tagged_sent = TAGGER.tag(str(sent)) tag_idx = 0 raw_idx = 0 if DEBUG: print(tagged_sent) while tag_idx < len(tagged_sent): pos_tag = tagged_sent[tag_idx] word = WordNode(word_id=0, raw=pos_tag[0], root=pos_tag[0], pos=pos_tag[1], keep=0, idx=new_base_idx) if DEBUG: print("IDX", tag_idx, pos_tag) print("reg", is_not_word(pos_tag[0])) print(" ", raw_idx, len(sent.words), sent.words) print(graf) if is_not_word(pos_tag[0]) or (pos_tag[1] == "SYM"): parsed_raw = pos_tag[0] pos_family = '.' word = word._replace(pos=pos_family) elif raw_idx < len(sent.words): parsed_raw = sent.words[raw_idx] pos_family = pos_tag[1].lower()[0] raw_idx += 1 word = word._replace(raw=str(parsed_raw)) if pos_family in POS_LEMMA: word = word._replace(root=str( parsed_raw.singularize().lemmatize(pos_family)).lower()) elif pos_family != '.': word = word._replace(root=str(parsed_raw).lower()) else: word = word._replace(root=str(parsed_raw)) if pos_family in POS_KEEPS: word = word._replace(word_id=get_word_id(word.root), keep=1) digest.update(word.root.encode("utf-8")) # schema: word_id, raw, root, pos, keep, idx if DEBUG: print(word) graf.append(list(word)) new_base_idx += 1 tag_idx += 1 markup.append(ParsedGraf(id=doc_id, sha1=digest.hexdigest(), graf=graf)) return markup, new_base_idx
time.sleep(timeToSleep + 1) else: time.sleep(1) if i % 100 == 0: print "on id number: ", i sys.stdout.flush() i = i + 1 counter = counter + 1 try: for status in tweepy.Cursor(api.user_timeline, user_id = idno, since_id = carsonDropsOutTweetId).items(20): statusText = status.text.lower() for person in personSentimentList: for keyword in person[2]: if keyword in statusText: tb = textblob.TextBlob(statusText) person[1].append((tb.sentiment.polarity, status.id)) mentionFlag = True break for issue in issueSentimentList: for keyword in issue[2]: if keyword in statusText: tb = textblob.TextBlob(statusText) issue[1].append((tb.sentiment.polarity, status.id)) mentionFlag = True break except KeyboardInterrupt: raise except: print sys.exc_info()[0] sys.stdout.flush()
import tweepy import textblob import os from dotenv import load_dotenv load_dotenv() consumer_key = os.getenv("consumer_key") consumer_secret = os.getenv("consumer_secret") access_token = os.getenv("access_token") access_secret = os.getenv("access_secret") auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_secret) api = tweepy.API(auth) searched = input() public_tweets = api.search(searched) for tweet in public_tweets: print(tweet.text) analysis = textblob.TextBlob(tweet.text) print(analysis.sentiment) print('')
def __init__(self, *args, **kwargs): models.Model.__init__(self, *args, **kwargs) self.blob = textblob.TextBlob(self.content)
def method1(): with open("../data/haiku/gen.json") as f: r = f.read() r = json.loads(r) keyWords = getKeywords() lemmatized_keywords = [] for keyword in keyWords: lemmatized_keywords.append(textblob.Word(keyword).lemmatize()) index = 0 match_found = 0 for key, value in r.items(): #value["sentence"] = scramble(value["sentence"],value["features"]) l = textblob.TextBlob(value["sentence"]) #print(value["sentence"]) l1 = l.ngrams(n=1) l2 = l.ngrams(n=2) l3 = l.ngrams(n=3) l1 = [" ".join(i) for i in l1] l2 = [" ".join(i) for i in l2] l3 = [" ".join(i) for i in l3] l = [] for i in l1: l.append(i) for i in l2: l.append(i) for i in l3: l.append(i) #print(l) found_keywords = [] for word in l: w = textblob.Word(word) if w.lemmatize() in lemmatized_keywords: found_keywords.append(w.lemmatize()) print(found_keywords) actual_keywords = [ textblob.Word(i).lemmatize() for i in value["features"] ] print(actual_keywords) if found_keywords == actual_keywords or all( elem in found_keywords for elem in actual_keywords): match_found += 1 """else: print(actual_keywords) print(found_keywords)""" """index+=1 if index==10: break""" acc = match_found / len(r) with open("Stats.txt", "a") as f: f.write("Accuracy on feature_recognition_2_5.json is: " + str(acc) + "\n") print("Accuracy = ", acc)
import textblob as tb import sentiment_analysis text = tb.TextBlob("I want to kill someone.") if 'I' in text.tokenize(): allpos = text.pos_tags verbphrase = [] for x in allpos: if x[1] in [ 'VB', 'VBZ', 'VBP', 'VBD', 'VBN', 'VBG', 'RB', 'RBR', 'RBS' ]: verbphrase.append(x[0]) print(sentiment_analysis.get_negs(' '.join(verbphrase)))
return 1 df['airline_sentiment_confidence']= df['airline_sentiment_confidence'].astype('str') df['sentiment']= (df['airline_sentiment']).apply(lambda x: sentiment_to_label(x)) df= df[['text','sentiment']] re_attags= re.compile(" @[^ ]* ") re_spaces= re.compile("\w+]") df['text']= df['text'].apply(lambda x: re_spaces.sub(" ",re_attags.sub(" ", " "+x+" "))[1:-1]) df= df.drop_duplicates(subset=['text']) df.index= df['id']= xrange(df.shape[0]) non_alphanums=re.compile('[^A-Za-z]+') def normalize_text(text): return non_alphanums.sub(' ', text).lower().strip() df['text_normalized']= df['text'].map(lambda x: normalize_text(x)) df['textblob_score']= df['text_normalized'].map(lambda x: textblob.TextBlob(x).polarity) import wordbag_regressor print "Train wordbag regressor" wordbag_regressor= wordbag_regressor.WordbagRegressor("../models/wordbag_model.pkl.gz", tripadvisor_dir) #wordbag_regressor= wordbag_regressor.WordbagRegressor("../models/wordbag_model.pkl.gz") df['wordbag_score']= wordbag_regressor.predict(df['text'].values) import wordhash_regressor print "Train wordhash regressor" wordhash_regressor= wordhash_regressor.WordhashRegressor("../models/wordhash_model.pkl.gz", tripadvisor_dir) #wordhash_regressor= wordhash_regressor.WordhashRegressor("../models/wordhash_model.pkl.gz") df['wordhash_score']= wordhash_regressor.predict(df['text'].values) import wordseq_regressor print "Train wordseq regressor"
def test_clean_html(self): html = '<b>Python</b> is a widely used <a href="/wiki/General-purpose_programming_language" title="General-purpose programming language">general-purpose</a>, <a href="/wiki/High-level_programming_language" title="High-level programming language">high-level programming language</a>.' assert_raises(NotImplementedError, lambda: tb.TextBlob(html, clean_html=True))
re_attags = re.compile(" @[^ ]* ") re_spaces = re.compile("\w+]") df['text'] = df['text'].apply( lambda x: re_spaces.sub(" ", re_attags.sub(" ", " " + x + " "))[1:-1]) df = df.drop_duplicates(subset=['text']) df.index = df['id'] = range(df.shape[0]) non_alphanums = re.compile('[^A-Za-z]+') def normalize_text(text): return non_alphanums.sub(' ', text).lower().strip() df['text_normalized'] = df['text'].map(lambda x: normalize_text(x)) df['textblob_score'] = df['text_normalized'].map( lambda x: textblob.TextBlob(x).polarity) import wordbag_regressor print("Train wordbag regressor") wordbag_regressor = wordbag_regressor.WordbagRegressor( "../models/wordbag_model.pkl.gz", tripadvisor_dir) #wordbag_regressor= wordbag_regressor.WordbagRegressor("../models/wordbag_model.pkl.gz") df['wordbag_score'] = wordbag_regressor.predict(df['text'].values) import wordhash_regressor print("Train wordhash regressor") wordhash_regressor = wordhash_regressor.WordhashRegressor( "../models/wordhash_model.pkl.gz", tripadvisor_dir) #wordhash_regressor= wordhash_regressor.WordhashRegressor("../models/wordhash_model.pkl.gz") df['wordhash_score'] = wordhash_regressor.predict(df['text'].values)
def test_senences_with_space_before_punctuation(self): text = "Uh oh. This sentence might cause some problems. : Now we're ok." b = tb.TextBlob(text) assert_equal(len(b.sentences), 3)
url = 'http://localhost:5984/japon/_design/japon28JuneHashtag/_view/japon28JuneHashtag' req = urllib2.Request(url) f = urllib2.urlopen(req) d = json.loads(f.read()) archivo = open("/home/usrkap/Downloads/ResultadoJaponHashtag.txt", "a") #opens file with name of "test.txt" cont_positives = 0 cont_negatives = 0 cont_neutrals = 0 cont_total = 0 for x in d['rows']: a = x['value'] texto_tweet = textblob.TextBlob(a) auc = '' if texto_tweet.sentiment.polarity > 0: aux = a + ';positive' cont_positives = cont_positives + 1 elif texto_tweet.sentiment.polarity < 0: aux = a + ';negative' cont_negatives = cont_negatives + 1 else: aux = a + ';neutral' cont_neutrals = cont_neutrals + 1 archivo.write(str((aux.encode("utf-8") + "\n"))) cont_total = cont_total + 1
def test_raw_sentences(self): blob = tb.TextBlob(self.text) assert_equal(len(blob.raw_sentences), 19) assert_equal(blob.raw_sentences[0], "Beautiful is better than ugly.")
def test_translate_detects_language_by_default(self): blob = tb.TextBlob(unicode("ذات سيادة كاملة")) assert_equal(blob.translate(), "With full sovereignty")
def test_repr(self): blob1 = tb.TextBlob('lorem ipsum') if PY2: assert_equal(repr(blob1), b"TextBlob(\"{0}\")".format(binary_type('lorem ipsum'))) else: assert_equal(repr(blob1), "TextBlob(\"{0}\")".format('lorem ipsum'))
def test_string_equality(self): blob = tb.TextBlob("Textblobs should be equal to strings.") assert_equal(blob, "Textblobs should be equal to strings.")
def test_tagging_nonascii(self): b = tb.TextBlob('Learn how to make the five classic French mother sauces: ' 'Béchamel, Tomato Sauce, Espagnole, Velouté and Hollandaise.') tags = b.tags assert_true(isinstance(tags[0][0], unicode))
def test_string_comparison(self): blob = tb.TextBlob("apple") assert_true(blob < "banana") assert_true(blob > 'aardvark')
def test_np_extractor_defaults_to_fast_tagger(self): text = "Python is a high-level scripting language." blob1 = tb.TextBlob(text) assert_true(isinstance(blob1.np_extractor, FastNPExtractor))
def test_hash(self): blob = tb.TextBlob('apple') assert_equal(hash(blob), hash('apple')) assert_not_equal(hash(blob), hash('banana'))
def test_can_use_different_np_extractors(self): e = ConllExtractor() text = "Python is a high-level scripting language." blob = tb.TextBlob(text) blob.np_extractor = e assert_true(isinstance(blob.np_extractor, ConllExtractor))
def test_stripped(self): blob = tb.TextBlob("Um... well this ain't right.!..") assert_equal(blob.stripped, "um well this aint right")
def test_discrete_sentiment(self): blob = tb.TextBlob("I feel great today.", analyzer=NaiveBayesAnalyzer()) assert_equal(blob.sentiment[0], 'pos')
df_sum['clean'] = lyr_lemmatized df_sum.head() # In[27]: cleaned = df_sum.clean.to_string() # In[28]: # overall sentiment of all lyrics # textblob uses a lookup dictionary for sentiment and subjectivity TextBlob(cleaned).sentiment # In[29]: tb = textblob.TextBlob(df_sum.clean[0]) tb.sentiment_assessments # In[30]: # naive bayes sentiment classification, sentiment probabilities nb = textblob.en.sentiments.NaiveBayesAnalyzer() nb.analyze(df_sum.clean[0]) # In[31]: def sentiment_score(clean_notes): score = analyzer.polarity_scores(clean_notes) weight = score['compound'] if weight >= 0.1: