def activityHome(request): form = teamForm(request.POST or None) if request.method == 'POST': if form.is_valid(): hashtag = request.POST.get('hashtag', '') return redirect('/tweetscan/home?hashtag='+str(hashtag)) else: hashtag_get = request.GET.get('hashtag') if hashtag_get is not None and hashtag_get != '': args = {'form': form,'hashtag': hashtag_get} #File: sentiment_mod.py import nltk import random #from nltk.corpus import movie_reviews from nltk.classify.scikitlearn import SklearnClassifier import pickle from sklearn.naive_bayes import MultinomialNB, BernoulliNB from sklearn.linear_model import LogisticRegression, SGDClassifier from sklearn.svm import SVC, LinearSVC, NuSVC from nltk.classify import ClassifierI from statistics import mode from nltk.tokenize import word_tokenize #i = 0 class VoteClassifier(ClassifierI): def __init__(self, *classifiers): self._classifiers = classifiers def classify(self, features): votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v) return mode(votes) def confidence(self, features): votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v) choice_votes = votes.count(mode(votes)) conf = choice_votes / len(votes) return conf documents_f = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/documents.pickle", "rb") documents = pickle.load(documents_f) documents_f.close() word_features5k_f = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/word_features5k.pickle", "rb") word_features = pickle.load(word_features5k_f) word_features5k_f.close() def find_features(document): words = word_tokenize(document) print(words) features = {} for w in word_features: features[w] = (w in words) #print(features[w]) return features featuresets_f = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/featuresets.pickle", "rb") featuresets = pickle.load(featuresets_f) featuresets_f.close() random.shuffle(featuresets) print(len(featuresets)) testing_set = featuresets[10000:] training_set = featuresets[:10000] open_file = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/originalnaivebayes5k.pickle", "rb") classifier = pickle.load(open_file) open_file.close() open_file = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/MNB_classifier5k.pickle", "rb") MNB_classifier = pickle.load(open_file) open_file.close() open_file = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/BernoulliNB_classifier5k.pickle", "rb") BernoulliNB_classifier = pickle.load(open_file) open_file.close() open_file = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/LogisticRegression_classifier5k.pickle", "rb") LogisticRegression_classifier = pickle.load(open_file) open_file.close() open_file = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/SGDC_classifier5k.pickle", "rb") SGDC_classifier = pickle.load(open_file) open_file.close() #open_file = open("/pickled_algos/LinearSVC_classifier5k.pickle", "rb") #LinearSVC_classifier = pickle.load(open_file) #open_file.close() voted_classifier = VoteClassifier( classifier, MNB_classifier, BernoulliNB_classifier, LogisticRegression_classifier) def sentiment(text): print("inside sentiment") feats = find_features(text) #print(feats) return voted_classifier.classify(feats),voted_classifier.confidence(feats) print("After senitment function loc") #Streaming Tweets from tweepy import Stream from tweepy import OAuthHandler from tweepy.streaming import StreamListener import json #consumer key, consumer secret, access token, access secret. ckey="lookB9U9DovzE29uvPBm9OV03" csecret="WJT4BPbyvWdEba3TfhRTBZZAw8JgHnj9bJGre4XOHvm0BOFs6o" atoken="3255426194-tv415MxWQSZlB4kxq4SQBQXhqNMnF54kwmxTfy5" asecret="MPHTzP2APFeu1o3mYMCLzt5EQFDo1oDNhqeoe5rEUqiRC" class listener(StreamListener): def __init__(self): super().__init__() self.counter = 0 self.limit = 5 def on_data(self, data): #i=0 #while(i<10): all_data = json.loads(data) tweet = all_data["text"] #print(tweet) #if all_data["created_at"] > sentiment_value, confidence = sentiment(tweet)#Sentiment of Tweets print(tweet, sentiment_value, confidence, self.counter) self.counter+=1 if confidence*100 >= 80: output = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-text-out.txt","a") output.write(tweet) output.write('\n') output.close() output = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-out.txt","a") output.write(sentiment_value) output.write('\n') output.close() #i+=1 if self.counter < self.limit: return True else: twitterStream.disconnect() return True def on_error(self, status): print(status) print("before twitterStream") open('C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-out.txt', 'w').close() open('C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-text-out.txt', 'w').close() auth = OAuthHandler(ckey, csecret) auth.set_access_token(atoken, asecret) print("after auth") #print(word_tokenize("hello my name is Poojan")) i=0 twitterStream = Stream(auth, listener()) twitterStream.filter(track=[hashtag_get],languages=["en"]) #Plotting Graphs of the sentiments import matplotlib.pyplot as plt import matplotlib.animation as animation from matplotlib import style import time plt.rcdefaults() style.use("ggplot") fig = plt.figure() ax1 = fig.add_subplot(1,1,1) def animate(i): pullData = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-out.txt","r").read() lines = pullData.split('\n') xar = [] yar = [] x = 0 y = 0 for l in lines[:]: x += 1 if "pos" in l: y += 1 elif "neg" in l: y -= 1 xar.append(x) yar.append(y) ax1.clear() ax1.plot(xar,yar) ani = animation.FuncAnimation(fig, animate, interval=1000) #plt.show() plt.savefig("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/static/wordcloud.jpg") #wordcloud from wordcloud import WordCloud tweet=open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-text-out.txt","r") read_tweet=tweet.read() wordcloud = WordCloud(width=800,height=500,max_font_size=100,random_state=21).generate(read_tweet) plt.figure(figsize=(10, 7)) plt.imshow(wordcloud, interpolation="bilinear") plt.axis('off') plt.title('WordCloud') plt.tight_layout() #plt.show() plt.savefig("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/static/wordcloud.jpg") #piechart pullData = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-out.txt","r").read() lines = pullData.split('\n') pos, neg = 0, 0 for l in lines: if l == 'pos': pos += 1 elif l == 'neg': neg += 1 labels = ['positive','negative'] sizes = [pos,neg] colors = ['#ff9999','#66b3ff'] explode = [0.1,0] fig1, ax1 = plt.subplots() ax1.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',shadow=True, startangle=90) ax1.axis('equal') plt.title('Sentiment Pie Chart') plt.tight_layout() #plt.show() plt.savefig("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/static/pie.jpg") else: args = {'form': form} return render(request, 'home.html', args)
return True except BaseException e: print 'failed ondata,', str(e) time.sleep(5) pass saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8') saveFile.write(u'[\n') saveFile.write(','.join(self.tweet_data)) saveFile.write(u'\n]') saveFile.close() exit() def on_error(self, status): print statuses saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8') saveFile.write(u'[\n') saveFile.write(','.join(self.tweet_data)) saveFile.write(u'\n]') saveFile.close() auth = OAuthHandler(ckey, consumer_secret) #OAuth object auth.set_access_token(access_token_key, access_token_secret) twitterStream = Stream(auth, listener(start_time, time_limit=20)) #initialize Stream object with a time out limit twitterStream.filter(track=keyword_list, languages=['en']) #call the filter method to run the Stream Object
super(listener, self).__init__() #initialize number of tweets to be obtained self.counter = 0 self.limit = 50000 def on_data(self, data): #open file a and write the json to the file print(data) try: with open('cars.json', 'a') as f: f.write(data) except BaseException as e: print('danger') #check if count is reached if self.counter < self.limit: self.counter = self.counter + 1 return True else: return False def on_error(self, status): print(status) #authentication auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) twitterStream = Stream(auth, listener()) #set the keyword and start the listener to download tweets twitterStream.filter(track=['cars'])
def sendData(c_socket): auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) twitter_stream = Stream(auth, TweetsListener(c_socket)) twitter_stream.filter(track=['soccer'])
def stream_tweets(self, tweetsFilename, hash_tag_list): listener = LiveTweetStreaming(tweetsFilename) auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_secret) stream = Stream(auth, listener) stream.filter(languages=['en'], track=hash_tag_list)
tweet_text = tweet_text.replace('\n', ' ').replace("\t", " ") if "RT" not in tweet_text: with codecs.open("tweets-daily/" + datetime.now().strftime("%Y-%m-%d") + '.txt', 'a', encoding="utf-8") as f: self.counter += 1 str_out = json_data["id_str"] + "\t" tags = "" for hashtag in json_data["entities"]["hashtags"]: tags = tags + "," + hashtag["text"] if tags != "": tags = tags[1:] str_out = str_out + "\t" + tags + "\t" + tweet_text + "\r\n" f.write(str_out) print(str(self.counter) + " : \t" + str_out) return True except BaseException as e: print("Error on_data: %s" % str(e)) return True def on_error(self, status): print(status) return True twitter_stream = Stream(auth, TweetListener()) twitter_stream.filter(languages=['fa'], track=['با', 'از', 'به', 'در'])
def main(): auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) twitter_stream = Stream(auth, MyListener()) # taking hashtag list from GameCategory.py twitter_stream.filter(track=all_hashtags)
# check for S3 configuration, enable if present s3_bucketname = config['io'].get('s3_bucketname', None) if s3_bucketname: twitter_listener = TwitterListener(outfile, target_count, s3_bucketname) else: print("[!] No S3 bucket name found, running in local archive mode.") twitter_listener = TwitterListener(outfile, target_count) auth = OAuthHandler(config['twitter'].get('consumer_key'), config['twitter'].get('consumer_secret')) auth.set_access_token(config['twitter'].get('access_token'), config['twitter'].get('access_token_secret')) stream = Stream(auth, twitter_listener) backoff_in_seconds = 1 while backoff_in_seconds < 65: try: print(" * Tracker String: %s" % tracker_string) stream.filter(track=[tracker_string]) except KeyboardInterrupt: print("Shutting down listener...") twitter_listener.close_file() print("Clean shutdown successful!") exit(0) except (UrlLibProtocolError, RequestsConnectionError): print("Connection reset by host, retrying in %d seconds." % twitter_listener.backoff_in_seconds) time.sleep(twitter_listener.backoff_in_seconds)
# -*- coding: utf-8 -*- """ best way to install tweepy : python3 -m pip install tweepy Author: Mozzie yao """ from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream import json import re # Vars storing user Ids to access Twitter API access_token = '3004457773-mcwtl44ExjnrYQ2mEDdnZQ4ukqRqIvfH7986wqc' access_token_secret = 'KNAkgxRLaxa8IP0wSIZfGF3cEkhVtpig8L6vzgf2ZOX3g' consumer_key = 'x7ACjFwB00CSOD71JIRM3b4xV' consumer_secret = 'mBeHhpaqhbWMpKpfQjlbPoX1ImCX7GGZ7eGqU9advQYMHkJNfp' # Query item tracklist = [] tweet_count = 0 n_tweets = 1000 ''' this part is usded for testing print display only class StdOutListener(StreamListener): def on_data(self,data): global tweet_count global n_tweets global stream
self.count_Android) out += "\n" f.write(out) print out return True except BaseException as e: print("Error on_data: %s" % str(e)) time.sleep(5) return True def on_error(self, status): print(status) return True # Twitter dev credentials consumer_key = 'your_consumer_key' consumer_secret = 'your_consumer_secret' access_token = 'your-access_token' access_secret = 'access_secret' if __name__ == '__main__': auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_secret) api = tweepy.API(auth) print "-Start: " + str(datetime.datetime.now().time()) search = u"#AppleTax" twitter_stream = Stream(auth, MyListener(search)) twitter_stream.filter(track=[search])
self.__rate_limits_per_user[screenName] = existing_rate + 1 # check rate limit, and reply accordingly if (existing_rate == self.__max_per_user): replyText = '@' + screenName + ' lo siento, pero solo 2 respuestas por hora 🤖👮' twitterApi.update_status(status=replyText, in_reply_to_status_id=tweetId) print( "replied to this user, saying he/she should wait for an hour." ) return elif (existing_rate > self.__max_per_user): print( "*already* replied to this user, he/she should wait for an hour, ignoring." ) return replyText = '@' + screenName + " " + self.response("") twitterApi.update_status(status=replyText, in_reply_to_status_id=tweetId) return def on_error(self, status): print(status) if __name__ == '__main__': streamListener = ReplyToTweet() twitterStream = Stream(auth, streamListener) twitterStream.filter(track=['@' + account_screen_name])
except AttributeError as e: print('AttributeError was returned, stupid bug') print(e) except tweepy.TweepError as e: print('Below is the printed exception') print(e) if '401' in e: # not sure if this will even work print('Below is the response that came in') print(e) time.sleep(60) else: # raise an exception if another status code was returned,we don't like other kinds time.sleep(60) except BaseException as e: print('failed ondata,', str(e)) time.sleep(5) exit() def on_error(self, status): print(status) # Instance auth = OAuthHandler(ckey, consumer_secret) # Consumer keys auth.set_access_token(access_token_key, access_token_secret) # Secret Keys # initialize Stream object with a time out limit twitterStream = Stream(auth, listener(start_time, time_limit=82800)) # bounding box filter for Washington twitterStream.filter(locations=[-124.84, 45.54, -116.92, 49.0])
def start_stream(self): self.twitterStream = Stream(auth, listener(self.keyword)) self.twitterStream.filter(track=[self.keyword], languages=['en'], is_async=True)
def stream_tweets(self, filters): listener = TwitterListener() auth = self.twitter_authenticator.authenticate_apps() stream = Stream(auth, listener) # stream = Stream(auth, listener) stream.filter(locations=filters)
print("Tweets collected! Creating points, please wait... ") return False except KeyError: pass return True except BaseException as e: print("Error on_data: {}".format(e)) return True def on_error(self, status): print(status) return True # Initiate stream of tweets using input as search parameters twitter_stream = Stream( auth, MyListener(num_tweets_to_grab=how_many, output_file=output)) twitter_stream.filter(track=[search_key]) # Convert and save JSON file with tweets to a geoJSON format to be converted into a shapefile with open(output, "r", newline="\r\n") as f: geo_data = {"type": "FeatureCollection", "features": []} for line in f: tweet = json.loads(line) if tweet["coordinates"]: geo_json_feature = { "type": "Feature", "geometry": tweet["coordinates"], "properties": { "text": tweet["text"], "created_at": tweet["created_at"] }
else: negative = negative + sen.sentiment.polarity compound = compound + senti print(count) print(tweet.strip()) print(senti) print(t) print(str(positive) + ' ' + str(negative) + ' ' + str(compound)) plt.axis([0, 70, -20, 20]) plt.xlabel('Time') plt.ylabel('Sentiment') plt.plot([t], [positive], 'go', [t], [negative], 'ro', [t], [compound], 'bo') plt.show() plt.pause(0.0001) if count == 200: return False else: return True def on_error(self, status): print(status) auth = OAuthHandler(ckey, csecret) auth.set_access_token(atoken, asecret) twitterStream = Stream(auth, listener(count)) twitterStream.filter(track=["obama"])
def bot(request, name): print("Hi") if request.method == 'POST': global sign reply = request.POST.get('reply', '') print(reply) global list_of_tweets global i global pos_count flag = True twitterStream = Stream(auth, listener()) twitterStream.filter(track=[reply]) print(list_of_tweets) block = [] for j in range(len(list_of_tweets)): url = "https://api.twitter.com/1.1/statuses/oembed.json" params = dict(id=list_of_tweets[j]) resp = requests.get(url=url, params=params) data = resp.json() try: aayu = data["html"].replace( "<script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>", "") block.append(aayu) except Exception as e: pass if pos_count > 5: sentiment = "The general sentiment of people is positive" elif pos_count > 3: sentiment = "The general sentiment of people is nuetral" else: sentiment = "The general sentiment of people is negative" list_of_tweets = [] i = 0 pos_count = 0 return render(request, 'stock/bot.html', { 'reply': reply, 'flag': flag, 'block': block, 'sentiment': sentiment }) else: global sign reply = name print(reply) flag = True twitterStream = Stream(auth, listener()) twitterStream.filter(track=[reply]) print(list_of_tweets) block = [] for j in range(len(list_of_tweets)): url = "https://api.twitter.com/1.1/statuses/oembed.json" params = dict(id=list_of_tweets[j]) resp = requests.get(url=url, params=params) data = resp.json() try: aayu = data["html"].replace( "<script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>", "") block.append(aayu) except Exception as e: pass if pos_count > 5: sentiment = "The general sentiment of people is positive" elif pos_count > 3: sentiment = "The general sentiment of people is nuetral" else: sentiment = "The general sentiment of people is negative" list_of_tweets = [] i = 0 pos_count = 0 return render(request, 'stock/bot.html', { 'reply': reply, 'flag': flag, 'block': block, 'sentiment': sentiment })
except KeyError: # traceback.print_exc() if 'limit' in data: time.sleep(1) self.tweets_gone += 1 if self.tweets_gone % 1000 == 0: print('{} tweets filtered'.format(self.tweets_gone)) else: self.outFile.close() return False def on_error(self, status): print(status) if __name__ == '__main__': auth = OAuthHandler(consumerKey, consumerSecret) auth.set_access_token(accessToken, secretAccessToken) print('set auth and access') stream = Stream( auth, MyListener(time_limit=15 * 60, file_name='data/preprocessed_michigan_tweets.json')) keywords = ['corona', 'quarantine', 'covid'] stream.filter(track=keywords) print('Finished')
class TweetFeels(object): """ The controller. :param credentials: A list of your 4 credential components. :param tracking: A list of keywords to track. :param db: A sqlite database to store data. Will be created if it doesn't already exist. Will append if it exists. :ivar lang: A list of languages to include in tweet gathering. :ivar buffer_limit: When the number of tweets in the buffer hits this limit all tweets in the buffer gets flushed to the database. :ivar connected: Tells you if TweetFeels is connected and listening to Twitter. :ivar sentiment: The real-time sentiment score. :ivar binsize: The fixed observation interval between new sentiment calculations. (default = 60 seconds) :ivar factor: The fall-off factor used in real-time sentiment calculation. (default = 0.99) """ _db_factory = (lambda db: TweetData(db)) _listener_factory = (lambda ctrl: TweetListener(ctrl)) _stream_factory = (lambda auth, listener: Stream(auth, listener)) def __init__(self, credentials, tracking=[], db='feels.sqlite'): self._feels = TweetFeels._db_factory(db) _auth = OAuthHandler(credentials[0], credentials[1]) _auth.set_access_token(credentials[2], credentials[3]) self._listener = TweetFeels._listener_factory(self) self._stream = TweetFeels._stream_factory(_auth, self._listener) self.tracking = tracking self.lang = ['en'] self._sentiment = Sentiment(0, 0, 0, 0) self._filter_level = 'low' self._bin_size = timedelta(seconds=60) self._latest_calc = self._feels.start self._tweet_buffer = deque() self.buffer_limit = 50 self._factor = 0.99 @property def binsize(self): return self._bin_size @binsize.setter def binsize(self, value): assert (isinstance(value, timedelta)) if value != self._bin_size: self._latest_calc = self._feels.start self._bin_size = value @property def factor(self): return self._factor @factor.setter def factor(self, value): assert (value <= 1 and value > 0) self._latest_calc = self._feels.start self._factor = value @property def connected(self): return self._stream.running @property def sentiment(self): end = self._feels.end sentiments = self.sentiments(strt=self._latest_calc, end=end, delta_time=self._bin_size) for s in sentiments: pass return s def start(self, seconds=None, selfupdate=60): """ Start listening to the stream. :param seconds: If you want to automatically disconnect after a certain amount of time, pass the number of seconds into this parameter. :param selfupdate: Number of seconds between auto-calculate. """ def delayed_stop(): time.sleep(seconds) print('Timer completed. Disconnecting now...') self.stop() def self_update(): while self.connected: time.sleep(selfupdate) self.sentiment if len(self.tracking) == 0: print('Nothing to track!') else: self._stream.filter(track=self.tracking, languages=self.lang, async=True) # This does not work due to upstream bug in tweepy 3.5.0. They have fixed it in # https://github.com/tweepy/tweepy/pull/783 # self._stream.filter( # track=self.tracking, languages=self.lang, async=True, # filter_level=self._filter_level # ) if seconds is not None: t = Thread(target=delayed_stop) t.start() if selfupdate is not None and selfupdate > 0: t2 = Thread(target=self_update) t2.start() def stop(self): """ Disconnect from the stream. Warning: Connecting and disconnecting too frequently will get you blacklisted by Twitter. Your connections should be long-lived. """ self._stream.disconnect() def on_data(self, data): """ Called by :class:`TweetListener` when new tweet data is recieved. Note: Due to upstream bug in tweepy for python3, it cannot handle the `filter_level` parameter in the `Stream.filter` function. Therefore, we'll take care of it here. The problem has been identified and fixed by the tweepy team here: https://github.com/tweepy/tweepy/pull/783 :param data: The tweet data. Should be a single :class:`Tweet`. :type data: Tweet """ filter_value = {'none': 0, 'low': 1, 'medium': 2} value = filter_value[data['filter_level']] if value >= filter_value[self._filter_level]: self._tweet_buffer.append(data) if len(self._tweet_buffer) > self.buffer_limit: t = Thread(target=self.clear_buffer) t.start() def clear_buffer(self): """ Pops all the tweets currently in the buffer and puts them into the db. """ while True: try: # The insert calculates sentiment values self._feels.insert_tweet(self._tweet_buffer.popleft()) except IndexError: break def on_error(self, status): """ Called by :class:`TweetListener` when an error is recieved. """ self.start() def sentiments(self, strt=None, end=None, delta_time=None, nans=False): """ Provides a generator for sentiment values in ``delta_time`` increments. :param start: The start time at which the generator yeilds a value. If not provided, generator will start from beginning of your dataset. :type start: datetime :param end: The ending datetime of the series. If not provided, generator will not stop until it reaches the end of your dataset. :type end: datetime :param delta_time: The time length that each sentiment value represents. If not provided, the generator will use the setting configured by :class:`TweetFeels`. :type delta_time: timedelta :param nans: Determines if a nan will be yielded when no tweets are observed within a bin. :type nans: boolean """ beginning = self._feels.start if strt is None: self._latest_calc = beginning strt = beginning else: self._latest_calc = max(strt, self._feels.start) if end is None: end = self._feels.end if delta_time is None: delta_time = self._bin_size # get to the starting point if strt < self._latest_calc: self._sentiment = Sentiment(0, 0, 0, 0) b = self._feels.tweets_between(beginning, strt) else: b = self._feels.tweets_between(self._latest_calc, strt) self._sentiment = self.model_sentiment(b, self._sentiment, self._factor) self._latest_calc = strt # start yielding sentiment values end = min(end, self._feels.end) if self._latest_calc < end: bins = self._feels.fetchbin(start=self._latest_calc, end=end, binsize=delta_time, empty=nans) sentiment = deque() for b in bins: try: # only save sentiment value if not the last element self._sentiment = sentiment.popleft() except IndexError: pass latest = self._sentiment if len(b) > 0: latest = self.model_sentiment(b, self._sentiment, self._factor) sentiment.append(latest) self._latest_calc = b.start # Yield the latest element if len(b) == 0 and nans: yield Sentiment(np.nan, b.influence, b.start, b.end) else: yield sentiment[-1] else: # this only happens when strt >= end yield self._sentiment def model_sentiment(self, b, s, fo=0.99): """ Defines the real-time sentiment model given a dataframe of tweets. :param b: A ``TweetBin`` to calculate the new sentiment value. :param s: The initial Sentiment to begin calculation. :param fo: Fall-off factor """ df = b.df.loc[b.df.sentiment != 0] # drop rows having 0 sentiment newval = s.value if (len(df) > 0): try: val = np.average(df.sentiment, weights=df.followers_count + df.friends_count) except ZeroDivisionError: val = 0 newval = s.value * fo + val * (1 - fo) return Sentiment(newval, b.influence, b.start, b.end)
sentiment = "Negative" tweets_list.append(text_tweet) sents.append(sentiment) #Create new dataframe from the list of tweets and sentiment tweets_df = pd.DataFrame({ 'Tweets': tweets_list, 'Sentiment': sents }) tweets_df.to_csv(company + '.csv') #Plot pie chart for every 20 tweets if len(tweets_df) in np.arange(10, 1100, 20): print(tweets_df) plt.figure() dataframe_assign_colors(tweets_df) plt.show(block=False) plt.pause(3) plt.close('all') return True except BaseException as e: print(e) print(traceback.format_exc()) def on_error(self, status): print(status) auth = OAuthHandler(ckey, csecret) auth.set_access_token(atoken, asecret) twitterStream = Stream(auth, listener(), tweet_mode='extended') twitterStream.filter(track=[company])
awsauth = AWS4Auth('', '', 'us-east-1', 'es') es = Elasticsearch(hosts=[{ 'host': host, 'port': 443 }], http_auth=awsauth, use_ssl=True, verify_certs=True, connection_class=RequestsHttpConnection) res = es.index(index="tweet-index", doc_type='tweet', id=temp["id"], body=final) print(res['created']) return True if __name__ == '__main__': auth = tweepy.OAuthHandler("", "") auth.set_access_token("", "") api = tweepy.API(auth) #track = ['obama', 'trump', 'manchester', 'pogba', 'clinton'] start_time = time.time() twitter_stream = Stream(auth, MyListener()) twitter_stream.filter(track=[ 'pogba', 'trump', 'manchester', 'clinton', 'rashford', 'zlatan', 'rooney', 'mourinho', 'messi', 'ronaldo' ])
def stream_tweets(self, fetched_tweets_filename, hash_tags_list): #This handles twitter authentication and the connection to the Twitter Streaming API listener = TwitterListener(fetched_tweets_filename) auth = self.twitter_authenticator.authenticate_twitter_app() stream = Stream(auth, listener) stream.filter(track=hash_tags_list)
return False return True def retweeted(tweet): """Retweets would produce duplicates and use too much memory; these are not collected""" try: if tweet["retweeted"] or 'RT' in tweet["text"][0:3]: return True except KeyError: pass return False def english(tweet): if "en" in tweet["lang"]: return True return False if __name__ == '__main__': listener = StdOutListener() auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, listener, tweet_mode='extended') # Define keywords for filtering keywords = ['refugee', 'asylum seeker', 'immigrant', 'migrant', 'emigrant'] stream.filter(track=keywords)
except BaseException as e: print("Error on_data: %s" % str(e)) return True def on_error(self, status): print(status) return True # note you must manually set the environment with your credentials consumer_key = os.environ['CONSUMER_KEY'] consumer_secret = os.environ['CONSUMER_SECRET'] access_token = os.environ['ACCESS_TOKEN'] access_secret = os.environ['ACCESS_SECRET'] auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_secret) canada_stream = Stream(auth, CanadaListener()) canada_stream.filter(locations=[-123.222463, 49.003687, -61.879918, 61.239175], is_async=True) time.sleep(4800) canada_stream.disconnect() spain_stream = Stream(auth, SpainListener()) spain_stream.filter(locations=[-7.412512, 35.785688, 3.321487, 42.376885], is_async=True) time.sleep(4800) spain_stream.disconnect()
with open('data1.json', 'a') as output1: json.dump(data, output1) with open('data2.json', 'a') as output2: output2.write(data) with open('tweets.txt', 'a') as tweets: tweets.write(data) tweets.write('\n') output1.close() tweets.close() output2.close() except BaseException as e: print('File execution is stopped', str(e)) return True def on_error(self, status): print(status) if __name__ == '__main__': # This code handles the twitter authentication and connections. l = StdOutListener() auth = OAuthHandler(consumer_key, consumer_secret_key) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, l) # Here it will retrieve the twitter data based on this filter. stream.filter(track=[ 'WHO', 'WebMD', 'Cigna', 'Aetna', 'Cerner', 'Highmark', 'Anthem', 'AHA', 'Mayo Clinic' ])
return True def on_timeout(self): logger.info("Timeout") return True def on_disconnect(self, notice): logger.info("Disconnect : " + str(notice)) return def on_warning(self, notice): logger.warning("Warning : " + str(notice)) return if __name__ == "__main__": logger.info("Start") while True: try: stream = Stream(auth, Listener(), secure=True) stream.userstream() except KeyboardInterrupt: logger.info("Stop") break except: pass
except: print("processing exception") return True # on failure def on_error(self, status): print(status) if __name__ == '__main__': listener = TweetStreamListener() auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) while True: try: stream = Stream(auth, listener) stream.filter(track=track_keywords, languages=languages_option) # stream.filter(track = ['Jennifer Lopez'], languages = ['en']) # stream.filter(follow = follow_users_ids, languages = ['en']) except IncompleteRead: pass except KeyboardInterrupt: stream.disconnect() break
Return: String -- converted file name """ return ''.join(convert_valid(one_char) for one_char in fname) def convert_valid(one_char): """Convert a character into '_' if invalid. Arguments: one_char -- the char to convert Return: Character -- converted char """ valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits) if one_char in valid_chars: return one_char else: return '_' if __name__ == '__main__': parser = get_parser() args = parser.parse_args() auth = OAuthHandler(config.consumer_key, config.consumer_secret) auth.set_access_token(config.access_token, config.access_secret) api = tweepy.API(auth) twitter_stream = Stream(auth, MyListener(args.data_dir, args.query, args.level)) twitter_stream.filter(track=[args.query], languages=['en'], filter_level=args.level)
authentication.set_access_token(ACCESS_TOKEN, TOKEN_SECRET) # Twitter Stream Listener HASHTAG = "bigdata" # track hashtag LOCALHOST = "localhost:9092" # connect to TOPIC = "twitter" # filter by topic # create Kafka Listener class KafkaListener(StreamListener): def on_data(self, data): producer.send_messages(TOPIC, data.encode('utf-8')) print(data) return True def on_error(self, status): print(status) # define client client = KafkaClient(LOCALHOST) # define producer producer = SimpleProducer(client) # define listener listener = KafkaListener() # set stream & track HASHTAG stream = Stream(authentication, listener) stream.filter(track=HASHTAG)
access_token_secret = "zoOzBdLYdHG0pP30UZM1znSt5RAWAp7OYrSSeG2Xe3I8N" consumer_key = "S3ZN012qzgK1IeR6yL5gnNFFX" consumer_secret = "EfpDZrGtfv5Aw8y687hMoOYpv3TzFwHhVDot5FTkuuCHfOC7hb" class FileWriteListener(StreamListener): def __init__(self): super(StreamListener, self).__init__() self.save_file = open('../data_California/tweets.json', 'w') self.tweets = [] def on_data(self, tweet): self.tweets.append(json.loads(tweet)) self.save_file.write(str(tweet)) def on_error(self, status): print(status) return True auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = API(auth) twitter_stream = Stream(auth, FileWriteListener()) # Here you can filter the stream by: # - keywords (as shown) # - users twitter_stream.filter(track=['#California', '#california', '#LA'])