def test_correct(self): blob = tb.Sentence("I havv bad speling.") assert_true(isinstance(blob.correct(), tb.Sentence)) assert_equal(blob.correct(), tb.Sentence("I have bad spelling.")) blob = tb.Sentence("I havv \ngood speling.") assert_true(isinstance(blob.correct(), tb.Sentence)) assert_equal(blob.correct(), tb.Sentence("I have \ngood spelling."))
def _get_words_features(message, remove_stopwords=False): if remove_stopwords: # Load stop words stop_words = stopwords.words('english') message = " ".join(word for word in message.split(' ') if word not in stop_words) sentence = textblob.Sentence(message) return sentence.word_counts
def on_data(self, data): try: #This is where we start getting tweets client=MongoClient(MONGO_HOST) db=client.twitterdb print(type(data)) ok=json.loads(data) print(type(ok)) db.tweets.insert(ok) i=ok if (u'retweeted_status' in i.keys()): try: tweet = i['retweeted_status']['extended_tweet']["full_text"] except: tweet = i['retweeted_status']['text'] #print("new tweet"+ tweet) else: if(u'extended_tweet' in i.keys()): tweet=i['extended_tweet']['full_text'] else: tweet=i['text'] tweet=emoji.demojize(tweet) y=tweet #demojise emoji and remove hashtags and'@' tweet=re.sub(r'_dark_skin_tone','',tweet) tweet=re.sub(r'\n','',tweet) tweet=re.sub(r'_light_skin_tone','',tweet) tweet=re.sub(r'_medium-dark_tone','',tweet) tweet=re.sub(r'_medium-light_tone','',tweet) tweet=re.sub(r'_medium_tone','',tweet) tweet=re.sub(r'#','',tweet) tweet=re.sub(r'@', '', tweet) tweet=re.sub(r"'s", '', tweet) tweet=re.sub(r'http.?://[^\s]+[\s]?', '', tweet) tweet=re.sub(r':','',tweet) parser = spacy.load('en', disable=['ner','textcat']) parse = parser(str(tweet)) #findSVAOS function to get split sentences ans=(findSVAOs(parse)) print("tweet is "+tweet) #print(ans) if(not ans): ans=list(tweet) ans.append('') final_list=[] final_list=findsub(ans,tweet,y) print(final_list) #the next 7-8 line were to find and predict seats for pune.This is an extra thing,You can ignore it pune_sub='' pune_sub=punetweets(tweet,y) pune_dict=db.pune.find_one({"_id":10}) db.pune.update_one({ "_id": 10 },{ '$inc': { str(pune_sub): 1 } }, upsert=False) #write json for each day date_today=date.today() with open(self.fetched_tweets_filename+str(date_today)+".json", 'a') as tf: tf.write(data+",") #get documents from firebase doc_ref = db1.collection(u'data').document(u'volume') try: doc = doc_ref.get() print(u'Document data: {}'.format(doc.to_dict())) except google.cloud.exceptions.NotFound: print(u'No such document!') doc2=doc.to_dict() #print(type(doc2)) doc_ref = db1.collection(u'data').document(u'sentiment') try: doc = doc_ref.get() print(u'Document data: {}'.format(doc.to_dict())) except google.cloud.exceptions.NotFound: print(u'No such document!') doc1=doc.to_dict() #to do the updation of documents to firebase for j in range(len(final_list[1])): doc2[final_list[1][j]]+=1; c=textblob.Sentence(final_list[0][j]) senti=c.sentiment.polarity print(senti) if(senti>0.2): doc1[final_list[1][j]+"_positive"]+=1 elif(senti==0): continue elif(senti<0.2): doc1[final_list[1][j]+"_negative"]+=1 else: continue db1.collection(u'data').document(u'volume').set(doc2) db1.collection(u'data').document(u'sentiment').set(doc1) return True except BaseException as e: print("Error on_data %s" % str(e)) return True
def test_translate(self, mock_translate): mock_translate.return_value = 'Esta es una frase.' blob = tb.Sentence("This is a sentence.") translated = blob.translate(to="es") assert_true(isinstance(translated, tb.Sentence)) assert_equal(translated, "Esta es una frase.")
def setUp(self): self.raw_sentence = \ 'Any place with frites and Belgian beer has my vote.' self.sentence = tb.Sentence(self.raw_sentence)
def test_translate(self): blob = tb.Sentence("This is a sentence.") translated = blob.translate(to="es") assert_true(isinstance(translated, tb.Sentence)) assert_equal(translated, "Esta es una frase.")
for j in range(len(datatoWrite[i])): if j == 0: ws.write(i, 1, datatoWrite[i][j]) continue else: line = line + ' ' + datatoWrite[i][j] ws.write(i, 0, line) wb.save('example.xls') import math import textblob as tb for y in range(0, len(final)): temp = '"""' + final[y] + '"""' final[y] = tb.Sentence(temp) def tf(word, blob): return blob.words.count(word) / len(blob.words) def n_containing(word, bloblist): return sum(1 for blob in bloblist if word in blob) def idf(word, bloblist): return math.log(len(bloblist) / (1 + n_containing(word, bloblist))) def tfidf(word, blob, bloblist):