Esempio n. 1
0
 def test_correct(self):
     blob = tb.Sentence("I havv bad speling.")
     assert_true(isinstance(blob.correct(), tb.Sentence))
     assert_equal(blob.correct(), tb.Sentence("I have bad spelling."))
     blob = tb.Sentence("I havv \ngood speling.")
     assert_true(isinstance(blob.correct(), tb.Sentence))
     assert_equal(blob.correct(), tb.Sentence("I have \ngood spelling."))
Esempio n. 2
0
def _get_words_features(message, remove_stopwords=False):
    if remove_stopwords:
        # Load stop words
        stop_words = stopwords.words('english')
        message = " ".join(word for word in message.split(' ') if word not in stop_words)
    sentence = textblob.Sentence(message)
    return sentence.word_counts
Esempio n. 3
0
    def on_data(self, data):
        try:            
            #This is where we start getting tweets
            client=MongoClient(MONGO_HOST)

            db=client.twitterdb
            print(type(data))
            ok=json.loads(data)
            print(type(ok))
            db.tweets.insert(ok)
            i=ok
            if (u'retweeted_status' in i.keys()):
                    try:
                        tweet = i['retweeted_status']['extended_tweet']["full_text"]
                    except:
                        tweet = i['retweeted_status']['text']
                    #print("new tweet"+ tweet)
            else:
                if(u'extended_tweet' in i.keys()):
                    tweet=i['extended_tweet']['full_text']
                else:
                    tweet=i['text']
            tweet=emoji.demojize(tweet)
            y=tweet
            #demojise emoji and remove hashtags and'@'
            tweet=re.sub(r'_dark_skin_tone','',tweet)
            tweet=re.sub(r'\n','',tweet)
            tweet=re.sub(r'_light_skin_tone','',tweet)
            tweet=re.sub(r'_medium-dark_tone','',tweet)
            tweet=re.sub(r'_medium-light_tone','',tweet)
            tweet=re.sub(r'_medium_tone','',tweet)
            tweet=re.sub(r'#','',tweet)
            tweet=re.sub(r'@', '', tweet)
            tweet=re.sub(r"'s", '', tweet)
            tweet=re.sub(r'http.?://[^\s]+[\s]?', '', tweet)
            tweet=re.sub(r':','',tweet)
            parser = spacy.load('en', disable=['ner','textcat'])
            parse = parser(str(tweet))
            #findSVAOS function to get split sentences
            ans=(findSVAOs(parse))
            print("tweet is "+tweet)
            #print(ans)
            if(not ans):
                ans=list(tweet)
                ans.append('')
            final_list=[]
            final_list=findsub(ans,tweet,y)
            print(final_list)
            #the next 7-8 line were to find and predict seats for pune.This is  an extra thing,You can ignore it
            pune_sub=''
            pune_sub=punetweets(tweet,y)
            pune_dict=db.pune.find_one({"_id":10})
            db.pune.update_one({
              "_id": 10
            },{
              '$inc': {
                str(pune_sub): 1
              }
            }, upsert=False)
            #write json for each day
            date_today=date.today()
            with open(self.fetched_tweets_filename+str(date_today)+".json", 'a') as tf:
                tf.write(data+",")
            #get documents from firebase
            doc_ref = db1.collection(u'data').document(u'volume')
            try:
                doc = doc_ref.get()
                print(u'Document data: {}'.format(doc.to_dict()))
            except google.cloud.exceptions.NotFound:
                print(u'No such document!')
            doc2=doc.to_dict()
            #print(type(doc2))

            doc_ref = db1.collection(u'data').document(u'sentiment')
            try:
                doc = doc_ref.get()
                print(u'Document data: {}'.format(doc.to_dict()))
            except google.cloud.exceptions.NotFound:
                print(u'No such document!')
            doc1=doc.to_dict()
            #to do the updation of documents to firebase
            for j in range(len(final_list[1])):
                doc2[final_list[1][j]]+=1;
                c=textblob.Sentence(final_list[0][j])
                senti=c.sentiment.polarity
                print(senti)
                if(senti>0.2):
                    doc1[final_list[1][j]+"_positive"]+=1
                elif(senti==0):
                    continue
                elif(senti<0.2):
                    doc1[final_list[1][j]+"_negative"]+=1
                else:
                    continue
                 
            db1.collection(u'data').document(u'volume').set(doc2)
            db1.collection(u'data').document(u'sentiment').set(doc1) 


            
            return True
        except BaseException as e:
            print("Error on_data %s" % str(e))
        return True
Esempio n. 4
0
 def test_translate(self, mock_translate):
     mock_translate.return_value = 'Esta es una frase.'
     blob = tb.Sentence("This is a sentence.")
     translated = blob.translate(to="es")
     assert_true(isinstance(translated, tb.Sentence))
     assert_equal(translated, "Esta es una frase.")
Esempio n. 5
0
 def setUp(self):
     self.raw_sentence = \
         'Any place with frites and Belgian beer has my vote.'
     self.sentence = tb.Sentence(self.raw_sentence)
Esempio n. 6
0
 def test_translate(self):
     blob = tb.Sentence("This is a sentence.")
     translated = blob.translate(to="es")
     assert_true(isinstance(translated, tb.Sentence))
     assert_equal(translated, "Esta es una frase.")
    for j in range(len(datatoWrite[i])):
        if j == 0:
            ws.write(i, 1, datatoWrite[i][j])
            continue
        else:
            line = line + ' ' + datatoWrite[i][j]
    ws.write(i, 0, line)

wb.save('example.xls')

import math
import textblob as tb

for y in range(0, len(final)):
    temp = '"""' + final[y] + '"""'
    final[y] = tb.Sentence(temp)


def tf(word, blob):
    return blob.words.count(word) / len(blob.words)


def n_containing(word, bloblist):
    return sum(1 for blob in bloblist if word in blob)


def idf(word, bloblist):
    return math.log(len(bloblist) / (1 + n_containing(word, bloblist)))


def tfidf(word, blob, bloblist):