def sent_sentiment(sent, analyzer):
    if analyzer == 'textblob':
        from textblob import TextBlob
        testimonial = TextBlob(sent)
        polarity = testimonial.sentiment.polarity
        if polarity > pos_threshold:
            # print 'positive'
            return 1
        elif polarity < neg_threshold:
            # print 'negative'
            return -1
        else:
            # print 'neutral'
            return 0
    elif analyzer == 'vader':
        from vaderSentiment.vaderSentiment import sentiment as vaderSentiment
        try:
            vs = vaderSentiment(sent)
        except:
            vs = vaderSentiment(sent.encode('utf8'))
        if vs['pos'] > vs['neg'] and vs['pos'] > vs['neu']:
            return 1
        elif vs['neg'] > vs['neu']:
            return -1
        else:
            return 0
    else:
        raise ValueError("Got incorrect analyzer type: %s" % analyzer)
def run_twitter_search(keyword, output_file):
    print(keyword.upper())
    counter = 0
    try:
        tso = TwitterSearchOrder() # create a TwitterSearchOrder object
        tso.set_keywords([str(keyword)]) # let's define all words we would like to have a look for
        # tso.set_language('en') # we want to see English tweets only
        tso.set_include_entities(True) # and don't give us all those entity information
        tso.set_geocode(45.551279, -92.586955, 530, imperial_metric=True)

        for tweet in ts.search_tweets_iterable(tso):
            counter = counter + 1
            search_term = keyword
            username = "******" if tweet['user']['screen_name'] is None else tweet['user']['screen_name']
            text = "NONE" if tweet['text'] is None else tweet['text']
            place = "NONE" if tweet['place'] is None else tweet['place']
            if (tweet['coordinates'] is not None):
                lat = tweet['coordinates']['coordinates'][1]
                lng = tweet['coordinates']['coordinates'][0]
            elif (tweet['place'] is not None):
                place_coordinates = tweet['place']['bounding_box']['coordinates']
                sum_lat = 0
                sum_lng = 0
                for pair in place_coordinates[0]:
                    sum_lat += pair[1]
                    sum_lng += pair[0]
                lat = sum_lat / len(place_coordinates[0])
                lng = sum_lng / len(place_coordinates[0])
                place = tweet['place']['full_name']
            else:
                lat = "NONE"
                lng = "NONE"
            location = "NONE" if tweet['user']['location'] is None else tweet['user']['location']
            created_at = "NONE" if tweet['created_at'] is None else tweet['created_at']
            description = "NONE" if tweet['user']['description'] is None else tweet['user']['description']
            verified = "NONE" if tweet['user']['verified'] is None else str(tweet['user']['verified'])
            sentiment_score = vaderSentiment(text.encode('utf-8'))
            compound_sentiment = sentiment_score['compound']
            description_sentiment = vaderSentiment(description.encode('utf-8'))['compound']
            try:
                df.loc[len(df)] = [search_term, username, text, lat, lng, location, created_at, place, description, verified, sentiment_score, compound_sentiment, description_sentiment]
                if((len(df) % 200) == 0):
                    write_to_excel(output_file, 'Sheet1', df)
                    print("_%s %s tweets/%s total" % (counter, keyword.upper(), len(df)))
            except:
                write_to_excel(output_file, str(keyword), df)
            if(counter == 10000):
                return
        write_to_excel(output_file, str(keyword), df)
        print("_______%s tweets saved" % (len(df)))

    except TwitterSearchException as e: # take care of all those ugly errors if there are some
        print(e)
Beispiel #3
0
 def determineSentiment(track_lyrics):
     # VADER Sentiment calculatrion and database storage
     try:
         track_lyrics = track_lyrics.encode('utf-8')
         sentiment = vaderSentiment(track_lyrics)
     except UnicodeEncodeError:
         track_lyrics = track_lyrics.encode('ascii', 'ignore')
         sentiment = vaderSentiment(track_lyrics)
     neg = sentiment['neg']
     neu = sentiment['neu']
     pos = sentiment['pos']
     return (neg,neu,pos)
Beispiel #4
0
def analyzeSentiment(sentences):

    """
    analyzeSentiment(listOfString) -> listofJSON

    returns a JSON object wrapped in an array, that contains the amount of positive, 
    neutral and negative tweets ready to be processed by charJS

    """

    neu = 0
    pos = 0
    neg = 0 
    
    #declare if each sentence carries a negative, neutral of positive sentiment    
    for sentence in sentences:
        vs = vaderSentiment(sentence)

        if (vs['neg'] > vs['pos']):
            neg+=1
        elif (vs['pos'] > vs['neg']):
            pos+=1
        else:
            neu+=1
   
    #output is in this format so chartJs can use it in the web-App
    return_data = [{'value': neg, 'label': 'Negative Tweets','color': '#D18177'},
    {'value': neu, 'label': 'Neutral Tweets','color': '#9CBABA'},
    {'value': pos, 'label': 'Positive Tweets','color': '#00688B'}]
  
        
    return return_data
Beispiel #5
0
def process_tweet(tweet):
    tweet = json.loads(tweet)
    tweet_text = tweet.get('text')
    trump = word_in_text('trump', tweet_text)
    clinton = word_in_text('clinton', tweet_text)
    if trump is False and clinton is False:
        return 0
    vs = vaderSentiment(tweet_text.encode('utf-8'))
    if vs.get('neu') == 1 or (vs.get('compound') > -.35
                              and vs.get('compound') < .35):
        return 0
    if vs.get('compound') > .35:
        pos = True
    if vs.get('compound') < -.35:
        pos = False
    post = {
        'trump': trump,
        'clinton': clinton,
        'vader': vs,
        'positive': pos,
        'text': tweet_text
    }

    posts = db.tweets
    post_id = posts.insert_one(post).inserted_id
    count_tweets(True, 'trump')
Beispiel #6
0
    def process_item(self, item, spider):
        try:
            item['vaderSentiment'] = vaderSentiment(item['lenArticle'])
        except:
            pass
        success_count = 0
        tech_count = 0
        update_count = 0
        partner_count = 0
        contest_count = 0
        tokens = nltk.word_tokenize(item['lenArticle'])
##        tokens = sorted(w for w in set(tokens) if len(w) > 1)
        
        for word in tokens:
            word = word.lower()
            if word in self.words_to_remove:
                tokens.remove(word)
            if word in self.words_to_rank_success:
                success_count += 1
            if word in self.words_to_rank_tech:
                tech_count += 1
            if word in self.words_to_rank_update:
                update_count += 1
            if word in self.words_to_rank_partner:
                partner_count += 1
            if word in self.words_to_rank_contest:
                contest_count += 1
                
        item['lenArticle'] = len(tokens)
        fdist = nltk.FreqDist(tokens)
        item['fdist'] = fdist.most_common(50)
        item['topic'] = {'success':success_count,'technology':tech_count,'update':update_count,'partner':partner_count,'contest':contest_count}
        return item
        """Look at the following commented out code, use a similar loop structure and """
def bigrams_unigrams_sentiment(texts):
    all_feats = []
    for text in texts:
        profanity_count = 0
        appreciation_count = 0
        help_count = 0
        for string in appreciation:
            appreciation_count += text.lower().count(string)
        for string in profanity:
            profanity_count += text.lower().count(string)
        for string in seeking_help:
            help_count += text.lower().count(string)
        words = text.split(" ")
        bigrams = []
        for i in xrange(len(words) - 1):
            bigrams.append(words[i] + " " + words[i+1])
        features = Counter(bigrams)
        features += Counter(words)
        vs = vaderSentiment(text)
        #print text, vs
        features['neg_sentiment'] = vs['neg']
        features['neu_sentiment'] = vs['neu']
        features['pos_sentiment'] = vs['pos']
        features['profanity_count'] = profanity_count
        features['appreciation_count'] = appreciation_count
        features['help_count'] = help_count
        features['text_length'] = len(text)
        all_feats.append(features)
    vectorizer = DictVectorizer(sparse=False)
    return vectorizer.fit_transform(all_feats)
Beispiel #8
0
def get_vadersentiment_polarity(text_to_analyze):
    analyzer = vaderSentiment()
    scores = analyzer.polarity_scores(text_to_analyze)
    neg = scores['neg']
    pos = scores['pos']
    polarity = pos - neg
    return polarity
Beispiel #9
0
    def parse_dir_contents(self,response):
	# This creates an instance of the item, the definition of which is stored in items.py
	item = Website()
	
	# The if statement checks if there exists a div with id = 'maincontent', suggesting we are on the cse website, hence no error will be thrown
	if (response.xpath('//div[@id="maincontent"]') != []):
		for sel in response.xpath('//div[@id="maincontent"]'):
			item['name'] = sel.xpath('h1/text()').extract()
			temp = []
			for ptag in sel.xpath('p'):
				temp += ptag.xpath('text()').extract()
			
		
	else:
		item["name"] = response.xpath('//h1/text()').extract()
		temp = []
		for ptag in response.xpath('//p'):
			temp += ptag.xpath('text()').extract()
	
	item['description'] = "" 
	for i in temp:
		item['description'] += i
	item['lenArticle']= len(item['description'].split())
	# This line computes the sentiment of the current article, and adds it to the solution object
	item['vaderSentiment'] = vaderSentiment(item['description'])
	# This basically gets the first part of the text from the right p tag of the footer, converts it into a str, and slices the needed part
	dateText =  response.xpath('//div[@id="footer"]/p[@class="right"]/text()[1]').extract()
	date = dateText[0]
	item['date'] = date[16:]
	yield item
def avg_review_vaderSentiment(data):
	
	sentiment_totals = defaultdict(int)
	result_categories = set()
	output = []

	for each in data.items():

		results = each[1]['results']
		result_categories.add(results)
		reviews_list = each[1].get('reviews')

		if reviews_list:

			n_reviews = len(reviews_list)
			review_sentiments = [vaderSentiment(x[1].encode('utf8')) for x in reviews_list]

			sentiment_totals[results + '_n_reviews'] += n_reviews
			sentiment_totals[results + '_pos_sentiment'] += sum([i['pos'] for i in review_sentiments])
			sentiment_totals[results + '_neg_sentiment'] += sum([i['neg'] for i in review_sentiments])
			sentiment_totals[results + '_neu_sentiment'] += sum([i['neu'] for i in review_sentiments])
			sentiment_totals[results + '_compound_sentiment'] += sum([i['compound'] for i in review_sentiments])

	for result in result_categories:
		t = (result, sentiment_totals[result + '_n_reviews'], float(sentiment_totals[result + '_pos_sentiment'])/float(sentiment_totals[result + '_n_reviews']), float(sentiment_totals[result + '_neg_sentiment'])/float(sentiment_totals[result + '_n_reviews']) ,float(sentiment_totals[result + '_compound_sentiment'])/float(sentiment_totals[result + '_n_reviews']))
		output.append(t)

	return output
def max_review_vaderSentiment(data, n, min_length):

    vader_reviews = {}

    pos_reviews = []
    neg_reviews = []

    for each in data.items():

        reviews_list = each[1].get('reviews')

        if reviews_list:

            review_sentiments = [
                (x[1].encode('utf8'), vaderSentiment(x[1].encode('utf8')))
                for x in reviews_list
                if (len(nltk.word_tokenize(x[1])) >= min_length)
            ]

            for r in review_sentiments:
                vader_reviews[r[0]] = (r[1]['neg'], r[1]['pos'])

    top_neg = sorted(vader_reviews.items(),
                     key=lambda x: x[1][0],
                     reverse=True)[0:n + 1]
    top_pos = sorted(vader_reviews.items(),
                     key=lambda x: x[1][1],
                     reverse=True)[0:n + 1]

    return top_pos, top_neg
 def VSPolarity(self, corpus):
     self.result = []
     for sentence in corpus:
         vs = vaderSentiment(sentence)
         aList = [vs["neg"], vs["neu"], vs["pos"]]
         self.result.append(aList)
     return np.array(self.result)
 def VSPolarity(self, corpus):
     self.result = []
     for sentence in corpus:
         vs = vaderSentiment(sentence)
         aList = [vs["neg"], vs["neu"], vs["pos"]]
         self.result.append(aList)
     print "Sentiment VSPolarity done!"
     return np.array(self.result)
Beispiel #14
0
def discretized_vader(text, cutoffs=[-0.150, 0.150]):
    '''
    Give a discreted VADER sentiment score
    '''
    score = vaderSentiment(text)['compound']
    if score < cutoffs[0]:
        return 1
    elif score > cutoffs[1]:
        return 3
    return 2
Beispiel #15
0
def getSentiment():
    for Tweet_text in Tweet_Corpus:
        vs = str(vaderSentiment(Tweet_text))
        #print Tweet_text
        #print str(vs)
        cur.execute(
            "INSERT INTO sentiment (tweet_text, sentiment) VALUES (%s, %s)",
            (Tweet_text, vs))
    cur.fetchall()
    db.commit()
def calculate_Sentiment(item):
    reviews = []
    values_list = item[1]

    for review in values_list:
        with open("/path/to/sentiment_results_file", "a") as f:
            sentiment_score = vaderSentiment(review[2])['compound']
            f.write("\n Product ID: " + str(item[0]) + "   Review: " +
                    str(review[2]) + "   User Rating: " + str(review[1]) +
                    "   Sentiment Score: " + str(sentiment_score))
Beispiel #17
0
def facebookReport(id):
    fb = facebook.GraphAPI(ACCESS_TOKEN)
    d_posts = fb.get_connections(id, 'posts')
    for i in range(10):
        post = d_posts['data'][i]
        postText = post['message']
        vs = vaderSentiment(postText.encode('utf-8'))
        words = postText.split()
        lexicalDiversity = len(set(words)) * 1.0 / len(words)
        print 'Post Text:', removeUnicode(postText)
        print 'Lexical Diversity:', lexicalDiversity
        print 'Sentiment:', vs['compound']
        print '================================================\n'
Beispiel #18
0
def get_tweets(q, count):
    auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,CONSUMER_KEY,CONSUMER_SECRET)
    tw = twitter.Twitter(auth=auth)
    
    print "==============================" * 2
    print "\n\t" + '\033[95m' +  str(count) + " Tweets with " + q + '\033[0m' + "\n"
    print "==============================" * 2
    
    tweets = tw.search.tweets(q=q, count=count, lang='en')
    
    texts = []
    counter = 0
    neg_tweet = 0
    pos_tweet = 0
    retweet_counter = 0
    total_senti = 0
    for status in tweets["statuses"]:
        counter += 1
        texts.append(status["text"])
        
        words = []
    
        for w in removeUnicode(status['text']).split():
            words.append(w)
        
        vs = vaderSentiment(status["text"].encode('utf-8'))
        if (vs['compound'] > 0): pos_tweet += 1
        if (vs['compound'] < 0): neg_tweet += 1
        print "\n" + "="* 10 + "Tweet # " + str(counter) + "=" * 10 + "\n"    
        print removeUnicode(status['text'])
        retweet_counter += status["retweet_count"]
        print "\nRetweet count: %d\n" % (status["retweet_count"])
        print "Lexical Diversity: ", 1.0 * len(set(words)) / len(words)
        print "\nSentiment: " + str(vs['compound'])
        total_senti += vs['compound']

    print "==============================" * 2
    print "\nReport over %d tweets: \n" % counter
    print "Total number of retweets: %d\n" % retweet_counter

    print "Negative tweets: " + str(neg_tweet)
    print "\nPositive tweets: " + str(pos_tweet)
    print "\nOverall sentiment: " + str(total_senti/counter)

    words = []
    for t in texts:
        words += [ w for w in removeUnicode(t).split() ]

    lexical_diversity = 1.0 * len(set(words)) / len(words)
    print "\nOverall lexical diversity: ", lexical_diversity
Beispiel #19
0
def func(std, emojificationParam, text, outFile="out.txt"):
    if text[-4:] == '.txt':
        f = open(text, "r")
        content = f.read()
    else:
        content = text
    output = open(outFile, "w")
    sentences = re.split("[?\.!]", content)
    for sentence in sentences:
        vs = vaderSentiment(sentence)
        text = word_tokenize(sentence)
        pos_tags = pos_tag(text)
        myEpd = epd.epd(0, std)
        emojipos = []
        for sample in range(
                0, int(np.random.rand() * emojificationParam * len(text))):
            a = myEpd.getPosition(text)
            if not a is None:
                emojipos += [int(a)]
        righttags = []
        for position in emojipos:
            if getTag(pos_tags, position) == True:
                if righttags.count(position) < 3:
                    righttags += [position]
        score = vs["compound"] * 50
        final_emojis = []
        emojis = []  #index to list of unicode values
        for k, v in emoji.items():
            for val in v:
                if val >= score - 5 and val <= score + 5:
                    emojis += [k]
        for index in righttags:
            sample = int(np.floor(np.random.rand() * len(emojis)))
            choice = emojis[sample]
            final_emojis += [(index, choice)]
        text = [unicode(t, "utf-8") for t in text]
        for index, e in final_emojis:
            if index == 0:
                samp = np.random.rand()
                if samp > .5:
                    text[index] = text[index] + e
                else:
                    text[index] = e + text[index]
            else:
                text[index] = text[index] + e
        s = " ".join(text)
        if len(s) > 2:
            output.write((s + unicode(". ")).encode("utf-8"))
            print(s + unicode(". ")).encode("utf-8")
    output.close()
    def process(self, tup):
        id = tup.values[0]
        title = tup.values[1].encode('ascii', 'ignore')
        tweet = tup.values[2].encode('ascii', 'ignore')
        # get the good tweet sentence emitted by the Parse bolt 
        # Check if this is the data structure that is emitted by ParseTweet bolt
        # Get the sentiment values for the tweet. 
        vs = vaderSentiment(tweet) 
        # print "\n\t" + str(vs)
        # vs is a dictionary in the form of {'neg': value1, 'neu': value2, 'pos': value3, 'compound': value4}
        # Create a list version of the vs dictionary
        #vslist = list(vs.values())

        # Emit the sentiment values. This will be a set of 4 key-value pairs in JSON format
        # self.emit_many(vs)
        # tuple acknowledgement is handled automatically


        # Get existing running count, running values for positive, negative, neutral & compound sentiment 
        # based on keyword, tmdbid, and increment it by one (for the counter) and the sentiment values from vs.
        #conn = psycopg2.connect(database="filmzz", user="******", password="******", host="localhost", port="5432")
        #cur = conn.cursor()
        currentCount=0
        currentSentiment=[]
        cur.execute("SELECT runningCount from TweetStatistic WHERE tmdbId=%s", [id])
        result=cur.fetchone()
        if result != None:
            currentCount=result[0]
      
        for key, value in vs.items():
	    currentSentiment.append(value) 
         
        if currentCount == 0:
            # Insert new rows into Tweets Statistic database with values for counter value, Movie Title and tmdbId
            currentCount = currentCount + 1
            cur.execute("INSERT INTO TweetStatistic (tmdbId, title, runningCount, runningNegativeSentiment, runningNeutralSentiment, runningPositiveSentiment, runningCompoundSentiment) VALUES (%s,%s,%s,%s,%s,%s,%s)", [id, title, currentCount, currentSentiment[0], currentSentiment[1], currentSentiment[2], currentSentiment[3]])
        else:
            cur.execute("SELECT runningNegativeSentiment, runningNeutralSentiment, runningPositiveSentiment, runningCompoundSentiment from TweetStatistic WHERE tmdbId=%s", [id])
            result=cur.fetchone()
            currentCount = currentCount + 1
            if result != None:
               runningNegativeSentiment=currentSentiment[0]+result[0]
               runningNeutralSentiment=currentSentiment[1]+result[1]
               runningPositiveSentiment=currentSentiment[2]+result[2]
               runningCompoundSentiment=currentSentiment[3]+result[3]
               self.log('Updating the statistics for id:%s' %(id))
               # Update the Tweets Statistic database
               cur.execute("UPDATE TweetStatistic SET runningCount = %s, runningNegativeSentiment = %s, runningNeutralSentiment = %s, runningPositiveSentiment=%s, runningCompoundSentiment = %s WHERE tmdbId = %s", (currentCount, runningNegativeSentiment, runningNeutralSentiment, runningPositiveSentiment, runningCompoundSentiment, id))
               conn.commit()
Beispiel #21
0
def print_statistics(tweets):
    for item in tweets:
        print "======================"
        print item["text"]
        print "Favorite Count: ", item["favorite_count"]
        print "Retweets: ", item["retweet_count"]
        print "Lexical Diversity", lexical_diversity(item["text"])
        print "Sentiment Analysis:", vaderSentiment(
            item["text"].encode('utf-8'))['compound']

        # not 100% sure this'll work
        if item["user"]:
            print "Username: "******"user"]["screen_name"].encode('utf-8')
            print "Description: ", item["user"]["description"].encode('utf-8')
            print "Location: ", item["user"]["location"].encode('utf-8')
Beispiel #22
0
def run_vader_sentiment_analyzer(batch, sentiments, cutoffs=[-0.50, 0.50]):
    '''
    One of the NLP tools used to evaluate sentiment.
    VADER outputs a compound score between -1 and +1 so we must make
    our own wrapper to categorize the scores the way we see fit.
    Cutoffs for negative, nuetral, and positive can be optionally specified.
    '''
    for text in batch:
        vs = vaderSentiment(text)
        score = vs['compound']
        if score < cutoffs[0]:
            sentiments['negative'] += 1
        elif score > cutoffs[1]:
            sentiments['positive'] += 1
        else:
            sentiments['neutral'] += 1
Beispiel #23
0
def sentiment_analysis(tweet):
    """Returns the sentiment of the tweet from -1 to 1"""
    vs = vaderSentiment(tweet)
    pos = vs["pos"]
    neg = vs["neg"]
    neu = vs["neu"]

    # find highest scoring sentiment
    highest = max(pos, neg, neu)

    if highest == neg:
        # negate value and then add to dict
        highest *= -1
    else:
        # neutral value = 0.0 and add to dict
        highest = 0.0

    return highest
Beispiel #24
0
def compute_vader_sentiments(posts):
    cnt = 0
    sentiments = []
    for post in posts:
        cnt += 1
        data = {}
        data["post"] = post
        vs = vaderSentiment(post)
        #print vs
        if vs["neg"] >= vs["neu"] and vs["neg"] >= vs["pos"]:
            data["label"] = "neg"
        elif vs["pos"] >= vs["neu"] and vs["pos"] >= vs["neg"]:
            data["label"] = "pos"
        elif vs["neu"] >= vs["neg"] and vs["neu"] >= vs["pos"]:
            data["label"] = "neutral"
        sentiments.append(data)
    print "sentiments computed for %d posts" % cnt
    return sentiments
Beispiel #25
0
def sentiment_classifier(course_code):
    # delete all previous classifications
    Classification.objects.filter(classifier='VaderSentiment').delete()
    # get messages
    sm_objs = LearningRecord.objects.filter(course_code=course_code)

    for sm_obj in sm_objs:
        message = sm_obj.message.encode('utf-8', 'replace')
        sentiment = "Neutral"
        vs = vaderSentiment(message)
        #print vs, message
        #print "\n\t" + str(vs)
        if (vs['compound'] > 0):
            sentiment = "Positive"
        elif (vs['compound'] < 0):
            sentiment = "Negative"
        # Save Classification
        classification_obj = Classification(xapistatement=sm_obj,classification=sentiment,classifier='VaderSentiment')
        classification_obj.save()
Beispiel #26
0
def get_sentiment(item, source):
    ''' Get the overall sentiment of the videos description '''

    if source == 'twitter':
        description = item['tweet']['orig_text']
    elif source == 'facebook':
        if 'description' in item:
            description = item['description']
        else:
            description = ''
    else:
        description = item['items'][0]['snippet']['description']

    description = description.encode('utf-8').strip()
    sent = vaderSentiment(description)

    item.setdefault("sentiment", sent['compound'])

    return item
Beispiel #27
0
    def on_data(self, data):
        fil = open("meu.txt", "a")
        stop = set(nltk.corpus.stopwords.words('english'))
        stop.update(['http', 'https', 'rt'])
        tweet = json.loads(data)
        if 'text' in tweet:
            texto = tweet['text'].encode('utf-8', 'ignore')
            self.numstop -= 1
            texto = self.user + '-' + texto
            self.producer.send(self.instanttopic, texto)
            saveTweet('pos', tweet, self.user)
            saveLocation('pos', tweet, self.user)

            vs = vaderSentiment(str(texto))
            contagemneg = vs['neg']
            contagempos = vs['pos']
            contagemspam = vs['neu']
            filo = open("vader.txt", 'a')
            if self.numstop == 0:
                return False
        return True
def avg_review_vaderSentiment(data):

    sentiment_totals = defaultdict(int)
    result_categories = set()
    output = []

    for each in data.items():

        results = each[1]['results']
        result_categories.add(results)
        reviews_list = each[1].get('reviews')

        if reviews_list:

            n_reviews = len(reviews_list)
            review_sentiments = [
                vaderSentiment(x[1].encode('utf8')) for x in reviews_list
            ]

            sentiment_totals[results + '_n_reviews'] += n_reviews
            sentiment_totals[results + '_pos_sentiment'] += sum(
                [i['pos'] for i in review_sentiments])
            sentiment_totals[results + '_neg_sentiment'] += sum(
                [i['neg'] for i in review_sentiments])
            sentiment_totals[results + '_neu_sentiment'] += sum(
                [i['neu'] for i in review_sentiments])
            sentiment_totals[results + '_compound_sentiment'] += sum(
                [i['compound'] for i in review_sentiments])

    for result in result_categories:
        t = (result, sentiment_totals[result + '_n_reviews'],
             float(sentiment_totals[result + '_pos_sentiment']) /
             float(sentiment_totals[result + '_n_reviews']),
             float(sentiment_totals[result + '_neg_sentiment']) /
             float(sentiment_totals[result + '_n_reviews']),
             float(sentiment_totals[result + '_compound_sentiment']) /
             float(sentiment_totals[result + '_n_reviews']))
        output.append(t)

    return output
Beispiel #29
0
def baseline(tweets, emoji_maps):
    """Takes cleaned tweets. Returns list of baseline predictions"""
    assigned_emojis = list()

    for tweet in tweets:
        emoji_assigned = False

        for word in tweet.split():

            for emoji_map in emoji_maps:

                tags = emoji_map["tags"]

                for tag in tags.split():

                    if word == tag and emoji_assigned == False:
                        uni = emoji_map["unicode"]
                        print(tag)
                        print(uni)
                        assigned_emojis.append(uni.lower())
                        emoji_assigned = True

        if emoji_assigned == False:
            vs = vaderSentiment(tweet)
            positive = vs["pos"]
            negative = vs["neg"]
            neutral = vs["neu"]
            compound = vs["compound"]

            if positive > negative:
                uni = "\u0001f60a"  # TODO: how were these emojis picked?
            else:
                uni = "\u0001f622"

            assigned_emojis.append(uni)

            emoji_assigned = True

    return assigned_emojis
Beispiel #30
0
	def process(self, tup):
		twt = tup.values[0].encode("utf-8","replace")
		# remove links
		twt = re.sub(r'https?:\S+', "", twt)

		# remove @ symbol
		twt = re.sub(r'@', "", twt)

		# split hashtags by camel case
		pattern = re.compile(r'#\S+')
		for match in re.findall(pattern, twt):
			split = match.replace("#","")
			split = re.sub(r"([A-Z])", " \\1", split)
			split = re.sub(r"([a-zA-Z])([0-9])", "\\1 \\2", split)
			split = split[1:] + "."
			twt = twt.replace(match,split)

        # run VADER over normalized text
		vader = vaderSentiment(twt)['compound']

        # emit results for every matching queryId
		for queryId in tup.values[1]:
				storm.emit([queryId, vader, tup.values[0]])
Beispiel #31
0
# Search twitter for @CocaCola's most recent tweets
q = 'from:CocaCola'
count=25
tweets = tw.search.tweets(q=q, count=count, lang='en', result_type='recent')
texts=[]

print 'Sentiment Analysis for @CocaCola\'s Most Recent Tweets:'
print '--------------------------------------------------------------------------'

# Sentiment analysis
for status in tweets['statuses']:
    texts.append(status['text'])
    print 'Tweet:'
    print '\t' + status['text'].encode('utf-8')
    vs = vaderSentiment(status['text'].encode('utf-8'))
    print 'Sentiment analysis:'
    print '\t' + str(vs['compound'])
    print '--------------------------------------------------------------------------'

print '\nLexical Analysis for @CocaCola\'s Most Recent Tweets:'
print '--------------------------------------------------------------------------'

# Lexical analysis
for text in texts:
    print 'Tweet:'
    print '\t' + text.encode('utf-8')
    words = []
    for w in text.split():
        words.append(w)
    print 'Lexical diversity:'
Beispiel #32
0
def analyze(path,output_path):

    k=open(path,'r')
    o=open(output_path,'w')
    for url in k:
        print url
        split_lines = url.split('/')
        date=extract_time_fox_news(url)
        if date!=0:
            result = []
            result.append(url.strip('\n').strip('\r'))
            if date>=20150101 and ('news' in split_lines or 'politics' in split_lines):
                total_paragraph = 0
                print date
                result.append(str(date))
                sentences = []
                page = requests.get(url.strip("\n"))


                soup = BeautifulSoup(page.content)
                total_score=0
                temp = soup.find_all('article', {"itemprop": "articleBody"})
                header = soup.find_all('em')
                first_p = soup.find_all('br')
                for x in header:
                    start = []
                    for y in x.contents:
                        if exclude_text not in str(y.string):
                            start.append(str(y.string))
                    texts= "".join(start)
                    if texts != " " and texts != "\n" and texts != "" and texts != "\r" and not "This material may not be published, broadcast, rewritten, or redistributed." in texts and texts != 'None':
                        print texts
                        sentences.append(texts)
                        zen = vaderSentiment(unicode(texts))
                        print zen
                        total_paragraph += 1
                        if zen['neu'] < float(1):
                            if zen['pos'] > zen['neg']:
                                total_score += zen['pos']
                            else:
                                total_score -= zen['neg']
                for x in first_p:
                    if str(x.next_sibling) != " " and exclude_text not in str(x.next_sibling):
                        texts= str(x.next_sibling)
                        if texts != " " and texts != "\n" and texts != "" and texts!="\r" and not "This material may not be published, broadcast, rewritten, or redistributed." in texts and texts!='None':
                            print texts
                            sentences.append(texts)
                            zen = vaderSentiment(unicode(texts))
                            print zen
                            total_paragraph += 1
                            if zen['neu'] < float(1):
                                if zen['pos'] > zen['neg']:
                                    total_score += zen['pos']
                                else:
                                    total_score -= zen['neg']

                for x in temp:
                    for y in x.contents:
                        texts=unicode(y.string).strip('\n')

                        if texts != " " and texts != "\n" and texts != "" and texts!="\r" and not "This material may not be published, broadcast, rewritten, or redistributed." in texts and texts!='None':
                            print texts
                            sentences.append(texts)
                            zen = vaderSentiment(unicode(texts))
                            print zen
                            total_paragraph += 1
                            if zen['neu'] < float(1):
                                if zen['pos'] > zen['neg']:
                                    total_score += zen['pos']
                                else:
                                    total_score -= zen['neg']

                rake_object = RAKE.Rake('stop-word-list.txt')
                keywords = rake_object.run("\n".join(sentences))
                trump_prob = 0
                hillary_prob = 0
                for x in keywords:
                    # print x

                    if 'trump' in x[0] or 'donald' in x[0]:
                        # print x
                        trump_prob += int(x[1])
                    if 'hillary' in x[0] or 'clinton' in x[0]:
                        # print x
                        hillary_prob += int(x[1])
                print url.strip("\n")
                print "Trump total ", trump_prob
                print "Hillary total ", hillary_prob
                if abs(trump_prob - hillary_prob) <= 2:
                    result.append("B")
                elif trump_prob > hillary_prob:
                    result.append("T")
                else:
                    result.append("H")
                # zen=vaderSentiment("\n".join(sentences))
                # if zen['neu']<0.5:
                #     if zen['pos']>zen['neg']:
                #         total_score=zen['pos']
                #     else:
                #         total_score=zen['neg']
                # total = neg + neu + pos
                # if total!=0:

                if total_paragraph != 0:
                    print "Total: ", total_score / total_paragraph
                    print "Total paragraph: ", total_paragraph
                    # print "Neg: ", neg
                    # print "Neu: ", neu
                    # print "Pos: ", pos
                    print "\n"
                    # result.append(str(neg))
                    # result.append(str(neu))
                    # result.append(str(pos))
                    result.append(str(total_score / total_paragraph))
                    # print result
                    o.writelines(", ".join(result) + "\n")
from datetime import datetime
from docopt import docopt
from twitterframework import TwitterAPI
from vaderSentiment.vaderSentiment import sentiment as vaderSentiment
import win_unicode_console as win_unicode_console

if __name__ == '__main__':
    arguments = docopt(__doc__, version='Twitter Stream DB 1.0')

twitter_fw = TwitterAPI(arguments['<file>'])
twitter_api = twitter_fw.getAPI()

keylist = [key for key in arguments['<TwitterKeyword>']]
rows = []
win_unicode_console.enable()  # allows printing unicode to windows console

for tweet in twitter_api.GetStreamFilter(follow=None,
                                         track=keylist,
                                         locations=None,
                                         delimited=None,
                                         stall_warnings=None):
    t_lang = tweet['lang']
    t_text = tweet['text']

    vs = vaderSentiment(t_text)
    for i, searchtext in enumerate(keylist):
        if searchtext in t_text:
            print("'{0}','{1}','{2}','{3}','{4}','{5}'".format(
                str(datetime.now()), str(t_text), searchtext, str(t_lang),
                vs['pos'], vs['neg']))
Beispiel #34
0
    "Warren Beatty has never been so entertaining.",
    "I won't say that the movie is astounding and I wouldn't claim that \
   the movie is too banal either.",
    "I like to hate Michael Bay films, but I couldn't fault this one",
    "It's one thing to watch an Uwe Boll film, but another thing entirely \
   to pay for it", "The movie was too good",
    "This movie was actually neither that funny, nor super witty.",
    "This movie doesn't care about cleverness, wit or any other kind of \
   intelligent humor.",
    "Those who find ugly meanings in beautiful things are corrupt without \
   being charming.",
    "There are slow and repetitive parts, BUT it has just enough spice to \
   keep it interesting.",
    "The script is not fantastic, but the acting is decent and the cinematography \
   is EXCELLENT!",
    "Roger Dodger is one of the most compelling variations on this theme.",
    "Roger Dodger is one of the least compelling variations on this theme.",
    "Roger Dodger is at least compelling as a variation on the theme.",
    "they fall in love with the product", "but then it breaks",
    "usually around the time the 90 day warranty expires",
    "the twin towers collapsed today",
    "However, Mr. Carter solemnly argues, his client carried out the kidnapping \
   under orders and in the ''least offensive way possible.''"
]
sentences.extend(tricky_sentences)
#sid = SentimentIntensityAnalyzer()
for sentence in sentences:
    print sentence,
    ss = vaderSentiment(sentence)
    print "\t" + str(ss)
    print ""
Beispiel #35
0
def get_sentiment(sentence):
	sentiments =  max(vaderSentiment(sentence) .iteritems(), key=operator.itemgetter(1))
	if sentiments[0] != "compound":
		return sentiments[0]
	else:
		return sentiments[1]
Beispiel #36
0
def getSentiment(text):
    vs = vaderSentiment(text.encode('utf-8'))
    return vs['compound']