Exemple #1
0
def activityHome(request):

	form = teamForm(request.POST or None)

	if request.method == 'POST':
		if form.is_valid():
			hashtag = request.POST.get('hashtag', '')
			return redirect('/tweetscan/home?hashtag='+str(hashtag))
	else:
		hashtag_get = request.GET.get('hashtag')
		if hashtag_get is not None and hashtag_get != '':
			args = {'form': form,'hashtag': hashtag_get}

			#File: sentiment_mod.py
			import nltk
			import random
			#from nltk.corpus import movie_reviews
			from nltk.classify.scikitlearn import SklearnClassifier
			import pickle
			from sklearn.naive_bayes import MultinomialNB, BernoulliNB
			from sklearn.linear_model import LogisticRegression, SGDClassifier
			from sklearn.svm import SVC, LinearSVC, NuSVC
			from nltk.classify import ClassifierI
			from statistics import mode
			from nltk.tokenize import word_tokenize


			#i = 0
			class VoteClassifier(ClassifierI):
			    def __init__(self, *classifiers):
			        self._classifiers = classifiers

			    def classify(self, features):
			        votes = []
			        for c in self._classifiers:
			            v = c.classify(features)
			            votes.append(v)
			        return mode(votes)

			    def confidence(self, features):
			        votes = []
			        for c in self._classifiers:
			            v = c.classify(features)
			            votes.append(v)

			        choice_votes = votes.count(mode(votes))
			        conf = choice_votes / len(votes)
			        return conf


			documents_f = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/documents.pickle", "rb")
			documents = pickle.load(documents_f)
			documents_f.close()




			word_features5k_f = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/word_features5k.pickle", "rb")
			word_features = pickle.load(word_features5k_f)
			word_features5k_f.close()


			def find_features(document):
			    words = word_tokenize(document)
			    print(words)
			    features = {}
			    for w in word_features:
			        features[w] = (w in words)
			        #print(features[w])

			    return features



			featuresets_f = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/featuresets.pickle", "rb")
			featuresets = pickle.load(featuresets_f)
			featuresets_f.close()

			random.shuffle(featuresets)
			print(len(featuresets))

			testing_set = featuresets[10000:]
			training_set = featuresets[:10000]



			open_file = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/originalnaivebayes5k.pickle", "rb")
			classifier = pickle.load(open_file)
			open_file.close()


			open_file = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/MNB_classifier5k.pickle", "rb")
			MNB_classifier = pickle.load(open_file)
			open_file.close()



			open_file = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/BernoulliNB_classifier5k.pickle", "rb")
			BernoulliNB_classifier = pickle.load(open_file)
			open_file.close()


			open_file = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/LogisticRegression_classifier5k.pickle", "rb")
			LogisticRegression_classifier = pickle.load(open_file)
			open_file.close()


			open_file = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/SGDC_classifier5k.pickle", "rb")
			SGDC_classifier = pickle.load(open_file)
			open_file.close()


			#open_file = open("/pickled_algos/LinearSVC_classifier5k.pickle", "rb")
			#LinearSVC_classifier = pickle.load(open_file)
			#open_file.close()




			voted_classifier = VoteClassifier(
			                                  classifier,
			                                  MNB_classifier,
			                                  BernoulliNB_classifier,
			                                  LogisticRegression_classifier)




			def sentiment(text):
			    print("inside sentiment")
			    feats = find_features(text)
			    #print(feats)
			    return voted_classifier.classify(feats),voted_classifier.confidence(feats)
			    
			print("After senitment function loc")
			#Streaming Tweets
			from tweepy import Stream
			from tweepy import OAuthHandler
			from tweepy.streaming import StreamListener
			import json

			#consumer key, consumer secret, access token, access secret.
			ckey="lookB9U9DovzE29uvPBm9OV03"
			csecret="WJT4BPbyvWdEba3TfhRTBZZAw8JgHnj9bJGre4XOHvm0BOFs6o"
			atoken="3255426194-tv415MxWQSZlB4kxq4SQBQXhqNMnF54kwmxTfy5"
			asecret="MPHTzP2APFeu1o3mYMCLzt5EQFDo1oDNhqeoe5rEUqiRC"

			class listener(StreamListener):
			  
			  	def __init__(self):
			  		super().__init__()
			  		self.counter = 0
			  		self.limit = 5


			  	def on_data(self, data):
			    
				    #i=0
				    #while(i<10):
				    

				    all_data = json.loads(data)
				    tweet = all_data["text"]
				    #print(tweet)
				    #if all_data["created_at"] >
				    sentiment_value, confidence = sentiment(tweet)#Sentiment of Tweets
				    print(tweet, sentiment_value, confidence, self.counter)
				    self.counter+=1
				    if confidence*100 >= 80:
				    	output = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-text-out.txt","a")
				    	output.write(tweet)
				    	output.write('\n')
				    	output.close()
				    	output = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-out.txt","a")
				    	output.write(sentiment_value)
				    	output.write('\n')
				    	output.close()

#i+=1
				    if self.counter < self.limit:
				    	return True
				    else:
				    	twitterStream.disconnect()
				    return True

				    def on_error(self, status):
				    	print(status)
			  
			print("before twitterStream")
			open('C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-out.txt', 'w').close()    
			open('C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-text-out.txt', 'w').close()
			auth = OAuthHandler(ckey, csecret)
			auth.set_access_token(atoken, asecret)
			print("after auth")
			#print(word_tokenize("hello my name is Poojan"))
			i=0
			twitterStream = Stream(auth, listener())
			twitterStream.filter(track=[hashtag_get],languages=["en"])
			#Plotting Graphs of the sentiments 
			import matplotlib.pyplot as plt
			import matplotlib.animation as animation
			from matplotlib import style
			import time
			plt.rcdefaults()
			style.use("ggplot")

			fig = plt.figure()
			ax1 = fig.add_subplot(1,1,1)

			def animate(i):
			    pullData = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-out.txt","r").read()
			    lines = pullData.split('\n')

			    xar = []
			    yar = []

			    x = 0
			    y = 0

			    for l in lines[:]:
			        x += 1
			        if "pos" in l:
			            y += 1
			        elif "neg" in l:
			            y -= 1

			        xar.append(x)
			        yar.append(y)
			        
			    ax1.clear()
			    ax1.plot(xar,yar)
			ani = animation.FuncAnimation(fig, animate, interval=1000)
			#plt.show()
			plt.savefig("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/static/wordcloud.jpg")
			
			#wordcloud
			from wordcloud import WordCloud
			tweet=open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-text-out.txt","r")
			read_tweet=tweet.read()
			wordcloud = WordCloud(width=800,height=500,max_font_size=100,random_state=21).generate(read_tweet)
			plt.figure(figsize=(10, 7))
			plt.imshow(wordcloud, interpolation="bilinear")
			plt.axis('off')
			plt.title('WordCloud')
			plt.tight_layout()
			#plt.show()
			plt.savefig("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/static/wordcloud.jpg")


			#piechart
			pullData = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-out.txt","r").read()
			lines = pullData.split('\n')
			pos, neg = 0, 0
			for l in lines:
				if l == 'pos':
					pos += 1
				elif l == 'neg':
					neg += 1
			labels = ['positive','negative']
			sizes = [pos,neg]
			colors = ['#ff9999','#66b3ff']
			explode = [0.1,0]
			fig1, ax1 = plt.subplots()
			ax1.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',shadow=True, startangle=90)
			ax1.axis('equal')
			plt.title('Sentiment Pie Chart')
			plt.tight_layout()
			#plt.show()
			plt.savefig("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/static/pie.jpg")
		else:
			args = {'form': form}
		
		return render(request, 'home.html', args)
				return True


			except BaseException e:
				print 'failed ondata,', str(e)
				time.sleep(5)
				pass

		saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8')
		saveFile.write(u'[\n')
		saveFile.write(','.join(self.tweet_data))
		saveFile.write(u'\n]')
		saveFile.close()
		exit()

	def on_error(self, status):

		print statuses

saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8')
saveFile.write(u'[\n')
saveFile.write(','.join(self.tweet_data))
saveFile.write(u'\n]')
saveFile.close()

auth = OAuthHandler(ckey, consumer_secret) #OAuth object
auth.set_access_token(access_token_key, access_token_secret)


twitterStream = Stream(auth, listener(start_time, time_limit=20)) #initialize Stream object with a time out limit
twitterStream.filter(track=keyword_list, languages=['en'])  #call the filter method to run the Stream Object
Exemple #3
0
        super(listener, self).__init__()

        #initialize number of tweets to be obtained
        self.counter = 0
        self.limit = 50000

    def on_data(self, data):
        #open file a and write the json to the file
        print(data)
        try:
            with open('cars.json', 'a') as f:
                f.write(data)
        except BaseException as e:
            print('danger')
#check if count is reached
        if self.counter < self.limit:
            self.counter = self.counter + 1
            return True
        else:
            return False

    def on_error(self, status):
        print(status)


#authentication
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
twitterStream = Stream(auth, listener())
#set the keyword and start the listener to download tweets
twitterStream.filter(track=['cars'])
Exemple #4
0
def sendData(c_socket):
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)

    twitter_stream = Stream(auth, TweetsListener(c_socket))
    twitter_stream.filter(track=['soccer'])
Exemple #5
0
 def stream_tweets(self, tweetsFilename, hash_tag_list):
     listener = LiveTweetStreaming(tweetsFilename)
     auth = OAuthHandler(consumer_key, consumer_secret)
     auth.set_access_token(access_token, access_secret)
     stream = Stream(auth, listener)
     stream.filter(languages=['en'], track=hash_tag_list)
            tweet_text = tweet_text.replace('\n', ' ').replace("\t", " ")
            if "RT" not in tweet_text:
                with codecs.open("tweets-daily/" +
                                 datetime.now().strftime("%Y-%m-%d") + '.txt',
                                 'a',
                                 encoding="utf-8") as f:
                    self.counter += 1
                    str_out = json_data["id_str"] + "\t"
                    tags = ""
                    for hashtag in json_data["entities"]["hashtags"]:
                        tags = tags + "," + hashtag["text"]
                    if tags != "":
                        tags = tags[1:]
                    str_out = str_out + "\t" + tags + "\t" + tweet_text + "\r\n"
                    f.write(str_out)
                    print(str(self.counter) + " : \t" + str_out)
            return True

        except BaseException as e:
            print("Error on_data: %s" % str(e))
            return True

    def on_error(self, status):
        print(status)
        return True


twitter_stream = Stream(auth, TweetListener())
twitter_stream.filter(languages=['fa'], track=['با', 'از', 'به', 'در'])
Exemple #7
0
def main():
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    twitter_stream = Stream(auth, MyListener())
    # taking hashtag list from GameCategory.py
    twitter_stream.filter(track=all_hashtags)
Exemple #8
0
    # check for S3 configuration, enable if present
    s3_bucketname = config['io'].get('s3_bucketname', None)

    if s3_bucketname:
        twitter_listener = TwitterListener(outfile, target_count,
                                           s3_bucketname)
    else:
        print("[!] No S3 bucket name found, running in local archive mode.")
        twitter_listener = TwitterListener(outfile, target_count)

    auth = OAuthHandler(config['twitter'].get('consumer_key'),
                        config['twitter'].get('consumer_secret'))
    auth.set_access_token(config['twitter'].get('access_token'),
                          config['twitter'].get('access_token_secret'))

    stream = Stream(auth, twitter_listener)

    backoff_in_seconds = 1
    while backoff_in_seconds < 65:
        try:
            print(" * Tracker String: %s" % tracker_string)
            stream.filter(track=[tracker_string])
        except KeyboardInterrupt:
            print("Shutting down listener...")
            twitter_listener.close_file()
            print("Clean shutdown successful!")
            exit(0)
        except (UrlLibProtocolError, RequestsConnectionError):
            print("Connection reset by host, retrying in %d seconds." %
                  twitter_listener.backoff_in_seconds)
            time.sleep(twitter_listener.backoff_in_seconds)
Exemple #9
0
# -*- coding: utf-8 -*-
"""
best way to install tweepy :
python3 -m pip install tweepy
Author: Mozzie yao
  
        
"""
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import json
import re
# Vars storing user Ids to access Twitter API
access_token = '3004457773-mcwtl44ExjnrYQ2mEDdnZQ4ukqRqIvfH7986wqc'
access_token_secret = 'KNAkgxRLaxa8IP0wSIZfGF3cEkhVtpig8L6vzgf2ZOX3g'
consumer_key = 'x7ACjFwB00CSOD71JIRM3b4xV'
consumer_secret = 'mBeHhpaqhbWMpKpfQjlbPoX1ImCX7GGZ7eGqU9advQYMHkJNfp'
# Query item
tracklist = []
tweet_count = 0
n_tweets = 1000
'''
this part is usded for testing print display only
class StdOutListener(StreamListener):
    def on_data(self,data):
        global tweet_count
        global n_tweets
        global stream
        
                            self.count_Android)
                    out += "\n"
                    f.write(out)
                    print out
                return True
        except BaseException as e:
            print("Error on_data: %s" % str(e))
            time.sleep(5)
        return True

    def on_error(self, status):
        print(status)
        return True


# Twitter dev credentials
consumer_key = 'your_consumer_key'
consumer_secret = 'your_consumer_secret'
access_token = 'your-access_token'
access_secret = 'access_secret'

if __name__ == '__main__':
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_secret)
    api = tweepy.API(auth)

    print "-Start: " + str(datetime.datetime.now().time())

    search = u"#AppleTax"
    twitter_stream = Stream(auth, MyListener(search))
    twitter_stream.filter(track=[search])
Exemple #11
0
            self.__rate_limits_per_user[screenName] = existing_rate + 1

            # check rate limit, and reply accordingly
            if (existing_rate == self.__max_per_user):
                replyText = '@' + screenName + ' lo siento, pero solo 2 respuestas por hora 🤖👮'
                twitterApi.update_status(status=replyText,
                                         in_reply_to_status_id=tweetId)
                print(
                    "replied to this user, saying he/she should wait for an hour."
                )
                return
            elif (existing_rate > self.__max_per_user):
                print(
                    "*already* replied to this user, he/she should wait for an hour, ignoring."
                )
                return

            replyText = '@' + screenName + " " + self.response("")
            twitterApi.update_status(status=replyText,
                                     in_reply_to_status_id=tweetId)
            return

    def on_error(self, status):
        print(status)


if __name__ == '__main__':
    streamListener = ReplyToTweet()
    twitterStream = Stream(auth, streamListener)
    twitterStream.filter(track=['@' + account_screen_name])
            except AttributeError as e:
                print('AttributeError was returned, stupid bug')
                print(e)
            except tweepy.TweepError as e:
                print('Below is the printed exception')
                print(e)
                if '401' in e:
                    # not sure if this will even work
                    print('Below is the response that came in')
                    print(e)
                    time.sleep(60)
                else:
                    # raise an exception if another status code was returned,we don't like other kinds
                    time.sleep(60)
            except BaseException as e:
                print('failed ondata,', str(e))
                time.sleep(5)
        exit()

    def on_error(self, status):
        print(status)


# Instance
auth = OAuthHandler(ckey, consumer_secret)  # Consumer keys
auth.set_access_token(access_token_key, access_token_secret)  # Secret Keys
# initialize Stream object with a time out limit
twitterStream = Stream(auth, listener(start_time, time_limit=82800))
# bounding box filter for Washington
twitterStream.filter(locations=[-124.84, 45.54, -116.92, 49.0])
 def start_stream(self):
     self.twitterStream = Stream(auth, listener(self.keyword))
     self.twitterStream.filter(track=[self.keyword],
                               languages=['en'],
                               is_async=True)
Exemple #14
0
 def stream_tweets(self, filters):
     listener = TwitterListener()
     auth = self.twitter_authenticator.authenticate_apps()
     stream = Stream(auth, listener)
     # stream = Stream(auth, listener)
     stream.filter(locations=filters)
Exemple #15
0
                print("Tweets collected! Creating points, please wait... ")
                return False
        except KeyError:
            pass
            return True
        except BaseException as e:
            print("Error on_data: {}".format(e))
        return True

    def on_error(self, status):
        print(status)
        return True


# Initiate stream of tweets using input as search parameters
twitter_stream = Stream(
    auth, MyListener(num_tweets_to_grab=how_many, output_file=output))
twitter_stream.filter(track=[search_key])

# Convert and save JSON file with tweets to a geoJSON format to be converted into a shapefile
with open(output, "r", newline="\r\n") as f:
    geo_data = {"type": "FeatureCollection", "features": []}
    for line in f:
        tweet = json.loads(line)
        if tweet["coordinates"]:
            geo_json_feature = {
                "type": "Feature",
                "geometry": tweet["coordinates"],
                "properties": {
                    "text": tweet["text"],
                    "created_at": tweet["created_at"]
                }
Exemple #16
0
            else:
                negative = negative + sen.sentiment.polarity
        compound = compound + senti
        print(count)
        print(tweet.strip())
        print(senti)
        print(t)
        print(str(positive) + ' ' + str(negative) + ' ' + str(compound))

        plt.axis([0, 70, -20, 20])
        plt.xlabel('Time')
        plt.ylabel('Sentiment')
        plt.plot([t], [positive], 'go', [t], [negative], 'ro', [t], [compound],
                 'bo')
        plt.show()
        plt.pause(0.0001)
        if count == 200:
            return False
        else:
            return True

    def on_error(self, status):
        print(status)


auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)

twitterStream = Stream(auth, listener(count))
twitterStream.filter(track=["obama"])
Exemple #17
0
def bot(request, name):
    print("Hi")
    if request.method == 'POST':
        global sign
        reply = request.POST.get('reply', '')
        print(reply)
        global list_of_tweets
        global i
        global pos_count
        flag = True
        twitterStream = Stream(auth, listener())
        twitterStream.filter(track=[reply])
        print(list_of_tweets)
        block = []
        for j in range(len(list_of_tweets)):
            url = "https://api.twitter.com/1.1/statuses/oembed.json"
            params = dict(id=list_of_tweets[j])
            resp = requests.get(url=url, params=params)
            data = resp.json()
            try:
                aayu = data["html"].replace(
                    "<script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>",
                    "")
                block.append(aayu)
            except Exception as e:
                pass
        if pos_count > 5:
            sentiment = "The general sentiment of people is positive"
        elif pos_count > 3:
            sentiment = "The general sentiment of people is nuetral"
        else:
            sentiment = "The general sentiment of people is negative"
        list_of_tweets = []
        i = 0
        pos_count = 0
        return render(request, 'stock/bot.html', {
            'reply': reply,
            'flag': flag,
            'block': block,
            'sentiment': sentiment
        })
    else:
        global sign
        reply = name
        print(reply)
        flag = True
        twitterStream = Stream(auth, listener())
        twitterStream.filter(track=[reply])
        print(list_of_tweets)
        block = []
        for j in range(len(list_of_tweets)):
            url = "https://api.twitter.com/1.1/statuses/oembed.json"
            params = dict(id=list_of_tweets[j])
            resp = requests.get(url=url, params=params)
            data = resp.json()
            try:
                aayu = data["html"].replace(
                    "<script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>",
                    "")
                block.append(aayu)
            except Exception as e:
                pass
        if pos_count > 5:
            sentiment = "The general sentiment of people is positive"
        elif pos_count > 3:
            sentiment = "The general sentiment of people is nuetral"
        else:
            sentiment = "The general sentiment of people is negative"
        list_of_tweets = []
        i = 0
        pos_count = 0
        return render(request, 'stock/bot.html', {
            'reply': reply,
            'flag': flag,
            'block': block,
            'sentiment': sentiment
        })
Exemple #18
0
            except KeyError:
                # traceback.print_exc()
                if 'limit' in data:
                    time.sleep(1)

            self.tweets_gone += 1
            if self.tweets_gone % 1000 == 0:
                print('{} tweets filtered'.format(self.tweets_gone))

        else:
            self.outFile.close()
            return False

    def on_error(self, status):
        print(status)


if __name__ == '__main__':
    auth = OAuthHandler(consumerKey, consumerSecret)
    auth.set_access_token(accessToken, secretAccessToken)
    print('set auth and access')

    stream = Stream(
        auth,
        MyListener(time_limit=15 * 60,
                   file_name='data/preprocessed_michigan_tweets.json'))
    keywords = ['corona', 'quarantine', 'covid']
    stream.filter(track=keywords)

    print('Finished')
class TweetFeels(object):
    """
    The controller.

    :param credentials: A list of your 4 credential components.
    :param tracking: A list of keywords to track.
    :param db: A sqlite database to store data. Will be created if it doesn't
               already exist. Will append if it exists.
    :ivar lang: A list of languages to include in tweet gathering.
    :ivar buffer_limit: When the number of tweets in the buffer hits this limit
                        all tweets in the buffer gets flushed to the database.
    :ivar connected: Tells you if TweetFeels is connected and listening to
                     Twitter.
    :ivar sentiment: The real-time sentiment score.
    :ivar binsize: The fixed observation interval between new sentiment
                   calculations. (default = 60 seconds)
    :ivar factor: The fall-off factor used in real-time sentiment calculation.
                  (default = 0.99)
    """
    _db_factory = (lambda db: TweetData(db))
    _listener_factory = (lambda ctrl: TweetListener(ctrl))
    _stream_factory = (lambda auth, listener: Stream(auth, listener))

    def __init__(self, credentials, tracking=[], db='feels.sqlite'):
        self._feels = TweetFeels._db_factory(db)
        _auth = OAuthHandler(credentials[0], credentials[1])
        _auth.set_access_token(credentials[2], credentials[3])
        self._listener = TweetFeels._listener_factory(self)
        self._stream = TweetFeels._stream_factory(_auth, self._listener)
        self.tracking = tracking
        self.lang = ['en']
        self._sentiment = Sentiment(0, 0, 0, 0)
        self._filter_level = 'low'
        self._bin_size = timedelta(seconds=60)
        self._latest_calc = self._feels.start
        self._tweet_buffer = deque()
        self.buffer_limit = 50
        self._factor = 0.99

    @property
    def binsize(self):
        return self._bin_size

    @binsize.setter
    def binsize(self, value):
        assert (isinstance(value, timedelta))
        if value != self._bin_size:
            self._latest_calc = self._feels.start
        self._bin_size = value

    @property
    def factor(self):
        return self._factor

    @factor.setter
    def factor(self, value):
        assert (value <= 1 and value > 0)
        self._latest_calc = self._feels.start
        self._factor = value

    @property
    def connected(self):
        return self._stream.running

    @property
    def sentiment(self):
        end = self._feels.end
        sentiments = self.sentiments(strt=self._latest_calc,
                                     end=end,
                                     delta_time=self._bin_size)
        for s in sentiments:
            pass
        return s

    def start(self, seconds=None, selfupdate=60):
        """
        Start listening to the stream.

        :param seconds: If you want to automatically disconnect after a certain
                        amount of time, pass the number of seconds into this
                        parameter.
        :param selfupdate: Number of seconds between auto-calculate.
        """
        def delayed_stop():
            time.sleep(seconds)
            print('Timer completed. Disconnecting now...')
            self.stop()

        def self_update():
            while self.connected:
                time.sleep(selfupdate)
                self.sentiment

        if len(self.tracking) == 0:
            print('Nothing to track!')
        else:
            self._stream.filter(track=self.tracking,
                                languages=self.lang,
                                async=True)
#  This does not work due to upstream bug in tweepy 3.5.0. They have fixed it in
#  https://github.com/tweepy/tweepy/pull/783
#            self._stream.filter(
#               track=self.tracking, languages=self.lang, async=True,
#               filter_level=self._filter_level
#               )
        if seconds is not None:
            t = Thread(target=delayed_stop)
            t.start()

        if selfupdate is not None and selfupdate > 0:
            t2 = Thread(target=self_update)
            t2.start()

    def stop(self):
        """
        Disconnect from the stream.

        Warning: Connecting and disconnecting too frequently will get you
        blacklisted by Twitter. Your connections should be long-lived.
        """
        self._stream.disconnect()

    def on_data(self, data):
        """
        Called by :class:`TweetListener` when new tweet data is recieved.

        Note: Due to upstream bug in tweepy for python3, it cannot handle the
        `filter_level` parameter in the `Stream.filter` function. Therefore,
        we'll take care of it here. The problem has been identified and fixed
        by the tweepy team here: https://github.com/tweepy/tweepy/pull/783

        :param data: The tweet data. Should be a single :class:`Tweet`.
        :type data: Tweet
        """
        filter_value = {'none': 0, 'low': 1, 'medium': 2}
        value = filter_value[data['filter_level']]

        if value >= filter_value[self._filter_level]:
            self._tweet_buffer.append(data)

            if len(self._tweet_buffer) > self.buffer_limit:
                t = Thread(target=self.clear_buffer)
                t.start()

    def clear_buffer(self):
        """
        Pops all the tweets currently in the buffer and puts them into the db.
        """
        while True:
            try:
                # The insert calculates sentiment values
                self._feels.insert_tweet(self._tweet_buffer.popleft())
            except IndexError:
                break

    def on_error(self, status):
        """
        Called by :class:`TweetListener` when an error is recieved.
        """
        self.start()

    def sentiments(self, strt=None, end=None, delta_time=None, nans=False):
        """
        Provides a generator for sentiment values in ``delta_time`` increments.

        :param start: The start time at which the generator yeilds a value. If
                      not provided, generator will start from beginning of your
                      dataset.
        :type start: datetime
        :param end: The ending datetime of the series. If not provided,
                    generator will not stop until it reaches the end of your
                    dataset.
        :type end: datetime
        :param delta_time: The time length that each sentiment value represents.
                           If not provided, the generator will use the setting
                           configured by :class:`TweetFeels`.
        :type delta_time: timedelta
        :param nans: Determines if a nan will be yielded when no tweets are
                     observed within a bin.
        :type nans: boolean
        """
        beginning = self._feels.start

        if strt is None:
            self._latest_calc = beginning
            strt = beginning
        else:
            self._latest_calc = max(strt, self._feels.start)
        if end is None:
            end = self._feels.end
        if delta_time is None:
            delta_time = self._bin_size

        # get to the starting point
        if strt < self._latest_calc:
            self._sentiment = Sentiment(0, 0, 0, 0)
            b = self._feels.tweets_between(beginning, strt)
        else:
            b = self._feels.tweets_between(self._latest_calc, strt)

        self._sentiment = self.model_sentiment(b, self._sentiment,
                                               self._factor)
        self._latest_calc = strt

        # start yielding sentiment values
        end = min(end, self._feels.end)
        if self._latest_calc < end:
            bins = self._feels.fetchbin(start=self._latest_calc,
                                        end=end,
                                        binsize=delta_time,
                                        empty=nans)
            sentiment = deque()
            for b in bins:
                try:
                    # only save sentiment value if not the last element
                    self._sentiment = sentiment.popleft()
                except IndexError:
                    pass

                latest = self._sentiment
                if len(b) > 0:
                    latest = self.model_sentiment(b, self._sentiment,
                                                  self._factor)
                sentiment.append(latest)
                self._latest_calc = b.start
                # Yield the latest element
                if len(b) == 0 and nans:
                    yield Sentiment(np.nan, b.influence, b.start, b.end)
                else:
                    yield sentiment[-1]
        else:
            # this only happens when strt >= end
            yield self._sentiment

    def model_sentiment(self, b, s, fo=0.99):
        """
        Defines the real-time sentiment model given a dataframe of tweets.

        :param b: A ``TweetBin`` to calculate the new sentiment value.
        :param s: The initial Sentiment to begin calculation.
        :param fo: Fall-off factor
        """
        df = b.df.loc[b.df.sentiment != 0]  # drop rows having 0 sentiment
        newval = s.value
        if (len(df) > 0):
            try:
                val = np.average(df.sentiment,
                                 weights=df.followers_count + df.friends_count)
            except ZeroDivisionError:
                val = 0
            newval = s.value * fo + val * (1 - fo)
        return Sentiment(newval, b.influence, b.start, b.end)
Exemple #20
0
                sentiment = "Negative"
            tweets_list.append(text_tweet)
            sents.append(sentiment)
            #Create new dataframe from the list of tweets and sentiment
            tweets_df = pd.DataFrame({
                'Tweets': tweets_list,
                'Sentiment': sents
            })
            tweets_df.to_csv(company + '.csv')
            #Plot pie chart for every 20 tweets
            if len(tweets_df) in np.arange(10, 1100, 20):
                print(tweets_df)
                plt.figure()
                dataframe_assign_colors(tweets_df)
                plt.show(block=False)
                plt.pause(3)
                plt.close('all')
            return True
        except BaseException as e:
            print(e)
            print(traceback.format_exc())

    def on_error(self, status):
        print(status)


auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener(), tweet_mode='extended')
twitterStream.filter(track=[company])
Exemple #21
0
            awsauth = AWS4Auth('', '', 'us-east-1', 'es')

            es = Elasticsearch(hosts=[{
                'host': host,
                'port': 443
            }],
                               http_auth=awsauth,
                               use_ssl=True,
                               verify_certs=True,
                               connection_class=RequestsHttpConnection)
            res = es.index(index="tweet-index",
                           doc_type='tweet',
                           id=temp["id"],
                           body=final)
            print(res['created'])
        return True


if __name__ == '__main__':
    auth = tweepy.OAuthHandler("", "")
    auth.set_access_token("", "")

    api = tweepy.API(auth)
    #track = ['obama', 'trump', 'manchester', 'pogba', 'clinton']
    start_time = time.time()
    twitter_stream = Stream(auth, MyListener())
    twitter_stream.filter(track=[
        'pogba', 'trump', 'manchester', 'clinton', 'rashford', 'zlatan',
        'rooney', 'mourinho', 'messi', 'ronaldo'
    ])
 def stream_tweets(self, fetched_tweets_filename, hash_tags_list):
     #This handles twitter authentication and the connection to the Twitter Streaming API 
     listener = TwitterListener(fetched_tweets_filename)
     auth = self.twitter_authenticator.authenticate_twitter_app()
     stream = Stream(auth, listener)
     stream.filter(track=hash_tags_list)
Exemple #23
0
        return False
    return True


def retweeted(tweet):
    """Retweets would produce duplicates and use too much memory; these are not collected"""
    try:
        if tweet["retweeted"] or 'RT' in tweet["text"][0:3]:
            return True
    except KeyError:
        pass
    return False


def english(tweet):
    if "en" in tweet["lang"]:
        return True
    return False


if __name__ == '__main__':

    listener = StdOutListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, listener, tweet_mode='extended')

    # Define keywords for filtering
    keywords = ['refugee', 'asylum seeker', 'immigrant', 'migrant', 'emigrant']
    stream.filter(track=keywords)
Exemple #24
0
        except BaseException as e:
            print("Error on_data: %s" % str(e))
        return True

    def on_error(self, status):
        print(status)
        return True


# note you must manually set the environment with your credentials

consumer_key = os.environ['CONSUMER_KEY']
consumer_secret = os.environ['CONSUMER_SECRET']
access_token = os.environ['ACCESS_TOKEN']
access_secret = os.environ['ACCESS_SECRET']

auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)

canada_stream = Stream(auth, CanadaListener())
canada_stream.filter(locations=[-123.222463, 49.003687, -61.879918, 61.239175],
                     is_async=True)
time.sleep(4800)
canada_stream.disconnect()

spain_stream = Stream(auth, SpainListener())
spain_stream.filter(locations=[-7.412512, 35.785688, 3.321487, 42.376885],
                    is_async=True)
time.sleep(4800)
spain_stream.disconnect()
Exemple #25
0
            with open('data1.json', 'a') as output1:
                json.dump(data, output1)
            with open('data2.json', 'a') as output2:
                output2.write(data)
            with open('tweets.txt', 'a') as tweets:
                tweets.write(data)
                tweets.write('\n')
            output1.close()
            tweets.close()
            output2.close()
        except BaseException as e:
            print('File execution is stopped', str(e))
        return True

    def on_error(self, status):
        print(status)


if __name__ == '__main__':
    # This code handles the twitter authentication and connections.

    l = StdOutListener()
    auth = OAuthHandler(consumer_key, consumer_secret_key)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, l)

    # Here it will retrieve the twitter data based on this filter.
    stream.filter(track=[
        'WHO', 'WebMD', 'Cigna', 'Aetna', 'Cerner', 'Highmark', 'Anthem',
        'AHA', 'Mayo Clinic'
    ])
Exemple #26
0
        return True

    def on_timeout(self):
        logger.info("Timeout")

        return True

    def on_disconnect(self, notice):
        logger.info("Disconnect : " + str(notice))

        return

    def on_warning(self, notice):
        logger.warning("Warning : " + str(notice))

        return

if __name__ == "__main__":
    logger.info("Start")
    while True:
        try:
            stream = Stream(auth, Listener(), secure=True)
            stream.userstream()

        except KeyboardInterrupt:
            logger.info("Stop")
            break

        except:
            pass
        except:
            print("processing exception")

        return True

    # on failure
    def on_error(self, status):
        print(status)


if __name__ == '__main__':

    listener = TweetStreamListener()

    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)

    while True:
        try:
            stream = Stream(auth, listener)
            stream.filter(track=track_keywords, languages=languages_option)
            # stream.filter(track = ['Jennifer Lopez'], languages = ['en'])
            # stream.filter(follow = follow_users_ids, languages = ['en'])

        except IncompleteRead:
            pass
        except KeyboardInterrupt:
            stream.disconnect()
            break
    Return:
        String -- converted file name
    """
    return ''.join(convert_valid(one_char) for one_char in fname)


def convert_valid(one_char):
    """Convert a character into '_' if invalid.

    Arguments:
        one_char -- the char to convert
    Return:
        Character -- converted char
    """
    valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits)
    if one_char in valid_chars:
        return one_char
    else:
        return '_'

if __name__ == '__main__':
    parser = get_parser()
    args = parser.parse_args()
    auth = OAuthHandler(config.consumer_key, config.consumer_secret)
    auth.set_access_token(config.access_token, config.access_secret)
    api = tweepy.API(auth)

    twitter_stream = Stream(auth, MyListener(args.data_dir, args.query, args.level))
    twitter_stream.filter(track=[args.query], languages=['en'], filter_level=args.level)

authentication.set_access_token(ACCESS_TOKEN, TOKEN_SECRET)

# Twitter Stream Listener
HASHTAG = "bigdata"					# track hashtag
LOCALHOST = "localhost:9092"		# connect to
TOPIC = "twitter"					# filter by topic

# create Kafka Listener
class KafkaListener(StreamListener):
    
	def on_data(self, data):
		producer.send_messages(TOPIC, data.encode('utf-8'))
		print(data)
		return True

	def on_error(self, status):
		print(status)

# define client
client = KafkaClient(LOCALHOST)

# define producer
producer = SimpleProducer(client)

# define listener
listener = KafkaListener()

# set stream & track HASHTAG
stream = Stream(authentication, listener)
stream.filter(track=HASHTAG)
Exemple #30
0
access_token_secret = "zoOzBdLYdHG0pP30UZM1znSt5RAWAp7OYrSSeG2Xe3I8N"
consumer_key = "S3ZN012qzgK1IeR6yL5gnNFFX"
consumer_secret = "EfpDZrGtfv5Aw8y687hMoOYpv3TzFwHhVDot5FTkuuCHfOC7hb"


class FileWriteListener(StreamListener):
    def __init__(self):
        super(StreamListener, self).__init__()
        self.save_file = open('../data_California/tweets.json', 'w')
        self.tweets = []

    def on_data(self, tweet):
        self.tweets.append(json.loads(tweet))
        self.save_file.write(str(tweet))

    def on_error(self, status):
        print(status)
        return True


auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = API(auth)

twitter_stream = Stream(auth, FileWriteListener())
# Here you can filter the stream by:
#    - keywords (as shown)
#    - users
twitter_stream.filter(track=['#California', '#california', '#LA'])