def activityHome(request): form = teamForm(request.POST or None) if request.method == 'POST': if form.is_valid(): hashtag = request.POST.get('hashtag', '') return redirect('/tweetscan/home?hashtag='+str(hashtag)) else: hashtag_get = request.GET.get('hashtag') if hashtag_get is not None and hashtag_get != '': args = {'form': form,'hashtag': hashtag_get} #File: import nltk import random #from nltk.corpus import movie_reviews from nltk.classify.scikitlearn import SklearnClassifier import pickle from sklearn.naive_bayes import MultinomialNB, BernoulliNB from sklearn.linear_model import LogisticRegression, SGDClassifier from sklearn.svm import SVC, LinearSVC, NuSVC from nltk.classify import ClassifierI from statistics import mode from nltk.tokenize import word_tokenize #i = 0 class VoteClassifier(ClassifierI): def __init__(self, *classifiers): self._classifiers = classifiers def classify(self, features): votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v) return mode(votes) def confidence(self, features): votes = [] for c in self._classifiers: v = c.classify(features) votes.append(v) choice_votes = votes.count(mode(votes)) conf = choice_votes / len(votes) return conf documents_f = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/documents.pickle", "rb") documents = pickle.load(documents_f) documents_f.close() word_features5k_f = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/word_features5k.pickle", "rb") word_features = pickle.load(word_features5k_f) word_features5k_f.close() def find_features(document): words = word_tokenize(document) print(words) features = {} for w in word_features: features[w] = (w in words) #print(features[w]) return features featuresets_f = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/featuresets.pickle", "rb") featuresets = pickle.load(featuresets_f) featuresets_f.close() random.shuffle(featuresets) print(len(featuresets)) testing_set = featuresets[10000:] training_set = featuresets[:10000] open_file = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/originalnaivebayes5k.pickle", "rb") classifier = pickle.load(open_file) open_file.close() open_file = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/MNB_classifier5k.pickle", "rb") MNB_classifier = pickle.load(open_file) open_file.close() open_file = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/BernoulliNB_classifier5k.pickle", "rb") BernoulliNB_classifier = pickle.load(open_file) open_file.close() open_file = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/LogisticRegression_classifier5k.pickle", "rb") LogisticRegression_classifier = pickle.load(open_file) open_file.close() open_file = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/pickled_algos/SGDC_classifier5k.pickle", "rb") SGDC_classifier = pickle.load(open_file) open_file.close() #open_file = open("/pickled_algos/LinearSVC_classifier5k.pickle", "rb") #LinearSVC_classifier = pickle.load(open_file) #open_file.close() voted_classifier = VoteClassifier( classifier, MNB_classifier, BernoulliNB_classifier, LogisticRegression_classifier) def sentiment(text): print("inside sentiment") feats = find_features(text) #print(feats) return voted_classifier.classify(feats),voted_classifier.confidence(feats) print("After senitment function loc") #Streaming Tweets from tweepy import Stream from tweepy import OAuthHandler from tweepy.streaming import StreamListener import json #consumer key, consumer secret, access token, access secret. ckey="lookB9U9DovzE29uvPBm9OV03" csecret="WJT4BPbyvWdEba3TfhRTBZZAw8JgHnj9bJGre4XOHvm0BOFs6o" atoken="3255426194-tv415MxWQSZlB4kxq4SQBQXhqNMnF54kwmxTfy5" asecret="MPHTzP2APFeu1o3mYMCLzt5EQFDo1oDNhqeoe5rEUqiRC" class listener(StreamListener): def __init__(self): super().__init__() self.counter = 0 self.limit = 5 def on_data(self, data): #i=0 #while(i<10): all_data = json.loads(data) tweet = all_data["text"] #print(tweet) #if all_data["created_at"] > sentiment_value, confidence = sentiment(tweet)#Sentiment of Tweets print(tweet, sentiment_value, confidence, self.counter) self.counter+=1 if confidence*100 >= 80: output = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-text-out.txt","a") output.write(tweet) output.write('\n') output.close() output = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-out.txt","a") output.write(sentiment_value) output.write('\n') output.close() #i+=1 if self.counter < self.limit: return True else: twitterStream.disconnect() return True def on_error(self, status): print(status) print("before twitterStream") open('C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-out.txt', 'w').close() open('C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-text-out.txt', 'w').close() auth = OAuthHandler(ckey, csecret) auth.set_access_token(atoken, asecret) print("after auth") #print(word_tokenize("hello my name is Poojan")) i=0 twitterStream = Stream(auth, listener()) twitterStream.filter(track=[hashtag_get],languages=["en"]) #Plotting Graphs of the sentiments import matplotlib.pyplot as plt import matplotlib.animation as animation from matplotlib import style import time plt.rcdefaults() style.use("ggplot") fig = plt.figure() ax1 = fig.add_subplot(1,1,1) def animate(i): pullData = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-out.txt","r").read() lines = pullData.split('\n') xar = [] yar = [] x = 0 y = 0 for l in lines[:]: x += 1 if "pos" in l: y += 1 elif "neg" in l: y -= 1 xar.append(x) yar.append(y) ax1.clear() ax1.plot(xar,yar) ani = animation.FuncAnimation(fig, animate, interval=1000) plt.savefig("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/static/wordcloud.jpg") #wordcloud from wordcloud import WordCloud tweet=open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-text-out.txt","r") wordcloud = WordCloud(width=800,height=500,max_font_size=100,random_state=21).generate(read_tweet) plt.figure(figsize=(10, 7)) plt.imshow(wordcloud, interpolation="bilinear") plt.axis('off') plt.title('WordCloud') plt.tight_layout() plt.savefig("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/static/wordcloud.jpg") #piechart pullData = open("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/outputs/twitter-out.txt","r").read() lines = pullData.split('\n') pos, neg = 0, 0 for l in lines: if l == 'pos': pos += 1 elif l == 'neg': neg += 1 labels = ['positive','negative'] sizes = [pos,neg] colors = ['#ff9999','#66b3ff'] explode = [0.1,0] fig1, ax1 = plt.subplots() ax1.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',shadow=True, startangle=90) ax1.axis('equal') plt.title('Sentiment Pie Chart') plt.tight_layout() plt.savefig("C:/Users/Poojan/Desktop/Projects/ETL Django/Tweenalyze/tweetscan/static/pie.jpg") else: args = {'form': form} return render(request, 'home.html', args)
def bot(request, name): print("Hi") if request.method == 'POST': global sign reply = request.POST.get('reply', '') print(reply) global list_of_tweets global i global pos_count flag = True twitterStream = Stream(auth, listener()) twitterStream.filter(track=[reply]) print(list_of_tweets) block = [] for j in range(len(list_of_tweets)): url = "" params = dict(id=list_of_tweets[j]) resp = requests.get(url=url, params=params) data = resp.json() try: aayu = data["html"].replace( "<script async src=\"\" charset=\"utf-8\"></script>", "") block.append(aayu) except Exception as e: pass if pos_count > 5: sentiment = "The general sentiment of people is positive" elif pos_count > 3: sentiment = "The general sentiment of people is nuetral" else: sentiment = "The general sentiment of people is negative" list_of_tweets = [] i = 0 pos_count = 0 return render(request, 'stock/bot.html', { 'reply': reply, 'flag': flag, 'block': block, 'sentiment': sentiment }) else: global sign reply = name print(reply) flag = True twitterStream = Stream(auth, listener()) twitterStream.filter(track=[reply]) print(list_of_tweets) block = [] for j in range(len(list_of_tweets)): url = "" params = dict(id=list_of_tweets[j]) resp = requests.get(url=url, params=params) data = resp.json() try: aayu = data["html"].replace( "<script async src=\"\" charset=\"utf-8\"></script>", "") block.append(aayu) except Exception as e: pass if pos_count > 5: sentiment = "The general sentiment of people is positive" elif pos_count > 3: sentiment = "The general sentiment of people is nuetral" else: sentiment = "The general sentiment of people is negative" list_of_tweets = [] i = 0 pos_count = 0 return render(request, 'stock/bot.html', { 'reply': reply, 'flag': flag, 'block': block, 'sentiment': sentiment })
class TweetFeels(object): """ The controller. :param credentials: A list of your 4 credential components. :param tracking: A list of keywords to track. :param db: A sqlite database to store data. Will be created if it doesn't already exist. Will append if it exists. :ivar lang: A list of languages to include in tweet gathering. :ivar buffer_limit: When the number of tweets in the buffer hits this limit all tweets in the buffer gets flushed to the database. :ivar connected: Tells you if TweetFeels is connected and listening to Twitter. :ivar sentiment: The real-time sentiment score. :ivar binsize: The fixed observation interval between new sentiment calculations. (default = 60 seconds) :ivar factor: The fall-off factor used in real-time sentiment calculation. (default = 0.99) """ _db_factory = (lambda db: TweetData(db)) _listener_factory = (lambda ctrl: TweetListener(ctrl)) _stream_factory = (lambda auth, listener: Stream(auth, listener)) def __init__(self, credentials, tracking=[], db='feels.sqlite'): self._feels = TweetFeels._db_factory(db) _auth = OAuthHandler(credentials[0], credentials[1]) _auth.set_access_token(credentials[2], credentials[3]) self._listener = TweetFeels._listener_factory(self) self._stream = TweetFeels._stream_factory(_auth, self._listener) self.tracking = tracking self.lang = ['en'] self._sentiment = Sentiment(0, 0, 0, 0) self._filter_level = 'low' self._bin_size = timedelta(seconds=60) self._latest_calc = self._feels.start self._tweet_buffer = deque() self.buffer_limit = 50 self._factor = 0.99 @property def binsize(self): return self._bin_size @binsize.setter def binsize(self, value): assert (isinstance(value, timedelta)) if value != self._bin_size: self._latest_calc = self._feels.start self._bin_size = value @property def factor(self): return self._factor @factor.setter def factor(self, value): assert (value <= 1 and value > 0) self._latest_calc = self._feels.start self._factor = value @property def connected(self): return self._stream.running @property def sentiment(self): end = self._feels.end sentiments = self.sentiments(strt=self._latest_calc, end=end, delta_time=self._bin_size) for s in sentiments: pass return s def start(self, seconds=None, selfupdate=60): """ Start listening to the stream. :param seconds: If you want to automatically disconnect after a certain amount of time, pass the number of seconds into this parameter. :param selfupdate: Number of seconds between auto-calculate. """ def delayed_stop(): time.sleep(seconds) print('Timer completed. Disconnecting now...') self.stop() def self_update(): while self.connected: time.sleep(selfupdate) self.sentiment if len(self.tracking) == 0: print('Nothing to track!') else: self._stream.filter(track=self.tracking, languages=self.lang, async=True) # This does not work due to upstream bug in tweepy 3.5.0. They have fixed it in # # self._stream.filter( # track=self.tracking, languages=self.lang, async=True, # filter_level=self._filter_level # ) if seconds is not None: t = Thread(target=delayed_stop) t.start() if selfupdate is not None and selfupdate > 0: t2 = Thread(target=self_update) t2.start() def stop(self): """ Disconnect from the stream. Warning: Connecting and disconnecting too frequently will get you blacklisted by Twitter. Your connections should be long-lived. """ self._stream.disconnect() def on_data(self, data): """ Called by :class:`TweetListener` when new tweet data is recieved. Note: Due to upstream bug in tweepy for python3, it cannot handle the `filter_level` parameter in the `Stream.filter` function. Therefore, we'll take care of it here. The problem has been identified and fixed by the tweepy team here: :param data: The tweet data. Should be a single :class:`Tweet`. :type data: Tweet """ filter_value = {'none': 0, 'low': 1, 'medium': 2} value = filter_value[data['filter_level']] if value >= filter_value[self._filter_level]: self._tweet_buffer.append(data) if len(self._tweet_buffer) > self.buffer_limit: t = Thread(target=self.clear_buffer) t.start() def clear_buffer(self): """ Pops all the tweets currently in the buffer and puts them into the db. """ while True: try: # The insert calculates sentiment values self._feels.insert_tweet(self._tweet_buffer.popleft()) except IndexError: break def on_error(self, status): """ Called by :class:`TweetListener` when an error is recieved. """ self.start() def sentiments(self, strt=None, end=None, delta_time=None, nans=False): """ Provides a generator for sentiment values in ``delta_time`` increments. :param start: The start time at which the generator yeilds a value. If not provided, generator will start from beginning of your dataset. :type start: datetime :param end: The ending datetime of the series. If not provided, generator will not stop until it reaches the end of your dataset. :type end: datetime :param delta_time: The time length that each sentiment value represents. If not provided, the generator will use the setting configured by :class:`TweetFeels`. :type delta_time: timedelta :param nans: Determines if a nan will be yielded when no tweets are observed within a bin. :type nans: boolean """ beginning = self._feels.start if strt is None: self._latest_calc = beginning strt = beginning else: self._latest_calc = max(strt, self._feels.start) if end is None: end = self._feels.end if delta_time is None: delta_time = self._bin_size # get to the starting point if strt < self._latest_calc: self._sentiment = Sentiment(0, 0, 0, 0) b = self._feels.tweets_between(beginning, strt) else: b = self._feels.tweets_between(self._latest_calc, strt) self._sentiment = self.model_sentiment(b, self._sentiment, self._factor) self._latest_calc = strt # start yielding sentiment values end = min(end, self._feels.end) if self._latest_calc < end: bins = self._feels.fetchbin(start=self._latest_calc, end=end, binsize=delta_time, empty=nans) sentiment = deque() for b in bins: try: # only save sentiment value if not the last element self._sentiment = sentiment.popleft() except IndexError: pass latest = self._sentiment if len(b) > 0: latest = self.model_sentiment(b, self._sentiment, self._factor) sentiment.append(latest) self._latest_calc = b.start # Yield the latest element if len(b) == 0 and nans: yield Sentiment(np.nan, b.influence, b.start, b.end) else: yield sentiment[-1] else: # this only happens when strt >= end yield self._sentiment def model_sentiment(self, b, s, fo=0.99): """ Defines the real-time sentiment model given a dataframe of tweets. :param b: A ``TweetBin`` to calculate the new sentiment value. :param s: The initial Sentiment to begin calculation. :param fo: Fall-off factor """ df = b.df.loc[b.df.sentiment != 0] # drop rows having 0 sentiment newval = s.value if (len(df) > 0): try: val = np.average(df.sentiment, weights=df.followers_count + df.friends_count) except ZeroDivisionError: val = 0 newval = s.value * fo + val * (1 - fo) return Sentiment(newval, b.influence, b.start, b.end)
