def main():
    filepath = 'example_tweets.txt'
    with open(filepath) as fp:
        line = fp.readline()
        cnt = 1
        while line:
            line = fp.readline()
            if line == '':
                continue
            print('original message: ' + line.strip())
            clean_message = (' '.join(
                re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ",
                       line).split()))
            clean_message2 = (' '.join(
                re.sub("(@[A-Za-z0-9]+)", " ", line).split()))
            clean_message3 = (' '.join(
                re.sub("(@[A-Za-z0-9]+)", " ", line).split()))
            clean_message4 = (' '.join(
                re.sub("(\w+:\/\/\S+)", " ", line).split()))
            clean_message5 = (' '.join(re.sub("(#.+)", " ", line).split()))
            clean_message6 = (' '.join(re.sub("(#.*$)", " ", line).split()))
            clean_message7 = (' '.join(re.sub("(#.*)", " ", line).split()))
            clean_message8 = (' '.join(re.sub("(#\w+)", " ", line).split()))
            clean_message9 = (' '.join(
                re.sub(
                    "(#\w+)|(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)",
                    " ", line).split()))

            #clean_message3 = (' '.join(re.sub((#text;"#\\w*")"," ", line).splsit())
            print('cleaned message: ' + clean_message)
            print('cleaned message2: ' + clean_message2)
            print('cleaned message3: ' + clean_message3)
            print('cleaned message4: ' + clean_message4)
            print('cleaned message5: ' + clean_message5)
            print('cleaned message6: ' + clean_message6)
            print('cleaned message7: ' + clean_message7)
            print('cleaned message8: ' + clean_message8)
            print('cleaned message9: ' + clean_message9)
            print('original sentiment score: ' + str(sentiment_score(line)))
            print('cleaned sentiment score: ' +
                  str(sentiment_score(clean_message)))
            analysis = TextBlob(clean_message)
            print('cleaned TextBlob sentiment score: ' +
                  str(analysis.sentiment.polarity))
            #print(analysis.sentiment.polarity)
            #print('cleaned TextBlob sentiment score: ' + (TextBlob(clean_message.sentiment.polarity)))
            print('\n')
            cnt += 1
Ejemplo n.º 2
0
def count_senti(row, counts):
    try:
        value = row.value
        coordinates = value[2]['boundingBox']['coordinates'][0][0]
        longitude = coordinates[0]
        latitude = coordinates[1]
        language = value[3]
        if language != 'en':
            return counts
        city = get_geo(latitude, longitude)
        if city == '':
            return counts
        score = sentiment_score(value[0])
    except:
        return counts

    data_city = counts.get(city, {})
    data_city['totalTweet'] = data_city.get('totalTweet', 0) + 1
    data_city['totalSenti'] = data_city.get('totalSenti', 0) + score
    if score > 0.1:
        data_city['totalPos'] = data_city.get('totalPos', 0) + 1
        data_city.setdefault('totalNeg', 0)
    else:
        data_city['totalNeg'] = data_city.get('totalNeg', 0) + 1
        data_city.setdefault('totalPos', 0)
    counts[city] = data_city

    return counts
Ejemplo n.º 3
0
def preprocessTweets(data):
  processed = []
  for i in data:
    tmp = []
    tmp.append(i[0].decode("utf-8"))
    tmp.append(i[1])
    tmp.append(len(i[0]))
    tmp.append(i[2])
    tmp.append(i[-1])
    tmp.append(sentiment_score(i[0].decode("utf-8")))
    processed.append(tmp)
  return processed
Ejemplo n.º 4
0
def preprocessTweets(data):
    processed = []
    for i in data:
        tmp = []
        tmp.append(i[0].decode("utf-8"))
        tmp.append(i[1])
        tmp.append(len(i[0]))
        tmp.append(i[2])
        tmp.append(i[-1])
        tmp.append(sentiment_score(i[0].decode("utf-8")))
        processed.append(tmp)
    return processed
Ejemplo n.º 5
0
    def post(self):
        tweet = self.get_argument("tweet", default="") 	    
        hashtag = self.get_argument("hashtag", default="")      
        t = tornado.template.Template(html)

	if tweet:
	    score = sentiment_score(tweet)
            self.write(t.generate(tweet_senti=str(score), hashtag_senti="0"))
	elif hashtag:
            
    	    tweets = api.search(hashtag, count=100)
            tweets = [tweet.text for tweet in tweets]
            scores = sentiment_scores_of_sents(tweets)
            for score, tweet in zip(scores, tweets):
                print score, tweet.encode('utf8')

            mean_score = np.mean(scores)
            
            self.write(t.generate(tweet_senti="0", hashtag_senti=str(mean_score)))
	else:
            self.write(t.generate(tweet_senti="0", hashtag_senti="0"))
Ejemplo n.º 6
0
    def post(self):
        tweet = self.get_argument("tweet", default="")
        hashtag = self.get_argument("hashtag", default="")
        t = tornado.template.Template(html)

        if tweet:
            score = sentiment_score(tweet)
            self.write(t.generate(tweet_senti=str(score), hashtag_senti="0"))
        elif hashtag:

            tweets = api.search(hashtag, count=100)
            tweets = [tweet.text for tweet in tweets]
            scores = sentiment_scores_of_sents(tweets)
            for score, tweet in zip(scores, tweets):
                print score, tweet.encode('utf8')

            mean_score = np.mean(scores)

            self.write(
                t.generate(tweet_senti="0", hashtag_senti=str(mean_score)))
        else:
            self.write(t.generate(tweet_senti="0", hashtag_senti="0"))
Ejemplo n.º 7
0
from sentiment import sentiment_score

__author__ = 'ravi'

print( sentiment_score("go die"))


Ejemplo n.º 8
0
#Calculate individual and average sentiment score
#Individual sentiment score 
from sentiment import sentiment_score
import json

data = []


for line in open('z_test_data.txt'):
b = line
a = sentiment_score(b)
data.append([b,a])




with open('z_saved_data.txt', 'w') as outfile:
json.dump(data, outfile)

Ejemplo n.º 9
0
    def on_status(self, status):
        """ This method is called whenever new data arrives from the live stream.
        We asynchronously push this data to Kafka queue.
        """
        # msg = status.text.encode('utf-8')

        try:
            twitter_json = json.dumps(status._json).encode('utf-8')
            twitterFilterJSON = json.loads(twitter_json)

            if twitterFilterJSON["entities"][
                    'user_mentions'] is not None and len(
                        twitterFilterJSON["entities"]['user_mentions']) > 0:
                if twitterFilterJSON['place'] is not None:
                    if not twitterFilterJSON['text'].startswith('RT'):

                        toUser_id = twitterFilterJSON["entities"][
                            'user_mentions'][0]["id_str"]

                        twitter_id_json = json.dumps(
                            api.get_user(toUser_id)._json).encode('utf-8')
                        toUserId_filter_JSON = json.loads(twitter_id_json)
                        # print toUserId_filter_JSON

                        if toUserId_filter_JSON[
                                "profile_location"] is not None:
                            # print (twitterFilterJSON['text'])
                            # print "TO:" + toUserId_filter_JSON["profile_location"]['name']
                            # print "From:" + str(twitterFilterJSON['place']['full_name'])

                            toLocationCoordinates = self.geolocator.geocode(
                                toUserId_filter_JSON["profile_location"]
                                ['name'].encode('utf-8'))
                            fromLocationCoordinates = self.geolocator.geocode(
                                twitterFilterJSON['place']['full_name'].encode(
                                    'utf-8'))
                            if twitterFilterJSON["lang"].startswith("en"):
                                sentscore = sentiment_score(
                                    twitterFilterJSON['text'].encode('utf-8'))
                                if sentscore > 0.5:
                                    sentiment = 'Pos'
                                else:
                                    sentiment = 'Neg'
                            else:
                                sentiment = 'Pos'
                            #sentiment = "Pos"#subprocess.check_output('python', '~/nn/twitter-sentiment-cnn', '--load', 'run20170423-124859', '--custom_input', twitterFilterJSON['text'].encode('utf-8'))

                            if toLocationCoordinates != None and fromLocationCoordinates != None:
                                tweetJSON = "{\"text\": \"" + twitterFilterJSON['text'].encode(
                                    'utf-8'
                                ).strip(
                                    '\\'
                                ) + "\", \"language\": \"" + twitterFilterJSON[
                                    'lang'].encode(
                                        'utf-8'
                                    ) + "\", \"sentiment\": \"" + sentiment.encode(
                                        'utf-8'
                                    ) + "\", \"fromLocation\": \"" + twitterFilterJSON[
                                        'place']['full_name'].encode(
                                            'utf-8'
                                        ) + "\", \"fromLocationLat\": \"" + str(
                                            fromLocationCoordinates.latitude
                                        ) + "\", \"fromLocationLong\": \"" + str(
                                            fromLocationCoordinates.longitude
                                        ) + "\", \"toLocation\": \"" + toUserId_filter_JSON[
                                            "profile_location"]['name'].encode(
                                                'utf-8'
                                            ) + "\", \"toLocationLat\": \"" + str(
                                                toLocationCoordinates.latitude
                                            ) + "\", \"toLocationLong\": \"" + str(
                                                toLocationCoordinates.longitude
                                            ) + "\" }".encode('utf-8')
                                print tweetJSON
                                self.producer.send_messages(
                                    'twitterstream', tweetJSON)

        except Exception as e:
            # Catch any unicode errors while printing to console
            # and just ignore them to avoid breaking application.
            pass
            print(e)
            return False

        return True
Ejemplo n.º 10
0
    file1 = open(OUTPUT_FOLDER_NAME + '/' + keyword['word'] + '.output', 'r')
    lines = file1.readlines() 
    sttime =  json.loads(lines[0])['created_at']
    #print(keyword['word'])
    all_likes = 0
    all_retweets = 0
    score = 0
    num = 0
    for line in lines:
        data = json.loads(line)
        msg = data['tweet']
        likes = data['likes_count']
        retweets = data['retweets_count']
        like_score = likes * 0.5
        rewtweet_score = retweets * 1.0
        sentiment = sentiment_score(msg)
        if (sentiment < 0.3 or sentiment > 0.7):
            if multiplier == -1:
                sentiment = 1 - sentiment
            score += sentiment + like_score + rewtweet_score
            num += abs((1 + like_score + rewtweet_score))
        #Save featured tweets
        if len(msg) <= featured_char_limit and (not ("@" in msg)) and (not ("https://" in msg)):
            featured.append(data)
        all_likes += likes
        all_retweets += retweets
    score = score / num
    averages.append(score)
    # print("likes: ", all_likes, ", retweets", all_retweets)

Ejemplo n.º 11
0
        df = pd.read_csv(item_path)
        item_columns = df.columns
        for review in review_list:
            if review in item_columns:
                str_list = df[review]
                # print str_list
                score_list = []
                count = 0
                total_count = len(str_list)
                print total_count
                tqdm_iter = tqdm(enumerate(list(str_list)))
                for i in tqdm_iter:
                    try:
                        str_item = unicode(i[1].encode("utf-8"))
                        count += 1
                        str_item_score_tmp = sentiment_score(str_item)
                        score_list.append(str_item_score_tmp)
                    except:
                        log_error(f, i[0], str(i[1]))
                        count += 1
                        score_list.append(str(i[1]))
                               
                score_list = pd.DataFrame(score_list)
                df = pd.concat([df, score_list], axis=1)
                ori_columns = list(df.columns)
                ori_columns[-1] = review + u'_score'
                df.columns = pd.Series(ori_columns)
        save_path = result_path + item
        df.to_csv(save_path)
        print df
Ejemplo n.º 12
0
def main():
    #message = "happiness is what"
    #print (sentiment_score(message))
    print(sentiment_score(u"I love you"))
Ejemplo n.º 13
0
    def delete_document(self, dbs_name, document_id, rev):
        url = 'http://{ip_address}:5984/{dbs_name}/{document_id}?rev={rev}'.format(
            ip_address=self.ip_address,
            dbs_name=dbs_name,
            document_id=document_id,
            rev=rev)

        response = requests.delete(url, auth=self.auth)
        return response.json()


if __name__ == '__main__':
    my_couchdb = couchDb_utils('admin', 'password', 'localhost')
    # res = my_couchdb.insert_document('demo', {'_id': 'second_record', 'init_balance': 1500})
    # res = my_couchdb.get_document('demo', 'second_record')
    # res = my_couchdb.delete_document('demo', 'second_record', "1-9528dce32655253d363029732a718a23")
    # print res
    with open('tinyTwitter.json', 'r') as f:
        tiny_twitter = json.loads(f.read())
        twitters = tiny_twitter['rows']
        for twitter in twitters:
            text = twitter['doc']['text']
            matched = re.findall(r'https?://t\.co/.{10}', text, re.MULTILINE)
            if matched:
                for strip_str in matched:
                    text = text.replace(strip_str, '')

            score = sentiment_score(text)
            data = {'positiveness': score, 'tweet': text}
            my_couchdb.insert_document('demo', data)