def setUp(self): TweetFeels._db_factory = (lambda db: MagicMock()) TweetFeels._auth_factory = (lambda cred: MagicMock()) TweetFeels._listener_factory = (lambda ctrl: MagicMock()) TweetFeels._stream_factory = (lambda auth, listener: MagicMock()) self.tweets_data_path = 'test/sample.json' self.tweets = [ {'created_at': 'Sun Feb 19 09:14:18 +0000 2017', 'id_str': '833394296418082817', 'text': 'Tweetfeels is tremendous! Believe me. I know.', 'user': {'followers_count': '100', 'friends_count': '200', 'location':None} }, # sentiment value = 0 {'created_at': 'Sun Feb 21 18:14:19 +0000 2017', 'id_str': '833394296418082818', 'text': 'Fake news. Sad!', 'user': {'followers_count': '100', 'friends_count': '200', 'location':None} }, # sentiment value = -0.7351 {'created_at': 'Sun Feb 21 19:14:20 +0000 2017', 'id_str': '833394296418082819', 'text': 'I hate it.', 'user': {'followers_count': '100', 'friends_count': '200', 'location':None} } # sentiment value = -0.5719 ] self.mock_feels = TweetFeels('abcd') self.feels_db = TweetData(file='./test/db.sqlite') self.mock_feels._feels = self.feels_db self.mock_tweets = [Tweet(t) for t in self.tweets] for t in self.mock_tweets: self.feels_db.insert_tweet(t) self.mock_feels.clear_buffer()
def setUp(self): self.tweets_data_path = 'test/sample.json' self.db = './test.sqlite' self.feels_db = TweetData(self.db) self.tweets = [ {'created_at': 'Sun Feb 19 19:14:18 +0000 2017', 'id_str': '833394296418082817', 'text': 'Tweetfeels is tremendous! Believe me. I know.', 'user': {'followers_count': '100', 'friends_count': '200', 'location':None} }, # sentiment value = 0 {'created_at': 'Sun Feb 20 19:14:19 +0000 2017', 'id_str': '833394296418082818', 'text': 'Fake news. Sad!', 'user': {'followers_count': '100', 'friends_count': '200', 'location':None} }, # sentiment value = -0.7351 {'created_at': 'Sun Feb 21 19:14:20 +0000 2017', 'id_str': '833394296418082819', 'text': 'I hate it.', 'user': {'followers_count': '100', 'friends_count': '200', 'location':None} } # sentiment value = -0.5719 ] self.mock_tweets = [Tweet(t) for t in self.tweets]
def __init__(self, credentials, tracking=[], db='feels.sqlite'): self._listener = TweetListener(self.on_data, self.on_error) self._feels = TweetData(db) _auth = OAuthHandler(credentials[0], credentials[1]) _auth.set_access_token(credentials[2], credentials[3]) self._stream = Stream(_auth, self._listener) self.tracking = tracking self.lang = ['en'] self._sentiment = 0 self._filter_level = 'low' self.calc_every_n = 10
class Test_Data(unittest.TestCase): def setUp(self): self.tweets_data_path = 'test/sample.json' self.db = './test.sqlite' self.feels_db = TweetData(self.db) def tearDown(self): os.remove(self.db) def test_file_creation(self): self.assertTrue(os.path.exists(self.db)) def test_fields(self): f = self.feels_db.fields self.assertTrue(isinstance(f, tuple)) self.assertTrue(len(f) >= 11) def test_scrub(self): data = {'a': 1, 'b': 2} scrubbed = self.feels_db.scrub(data) self.assertTrue(isinstance(scrubbed, str)) def test_data_operation(self): twt = { 'created_at': 'Sun Feb 19 19:14:18 +0000 2017', 'id_str': '833394296418082817', 'text': 'All the feels!' } t = Tweet(twt) self.assertEqual(len(t.keys()), 3) self.feels_db.insert_tweet(t) df = self.feels_db.queue self.assertEqual(len(df), 1) df.sentiment = 0.9 for row in df.itertuples(): self.feels_db.update_tweet({ 'id_str': row.id_str, 'sentiment': row.sentiment }) self.assertEqual(len(self.feels_db.queue), 0) self.assertEqual(len(self.feels_db.all), 1)
def test_buffer(self): mock_feels = TweetFeels('abcd') mock_feels.buffer_limit = 5 feels_db = TweetData(file='sample.sqlite') mock_feels._feels = feels_db with open(self.tweets_data_path) as tweets_file: lines = list(filter(None, (line.rstrip() for line in tweets_file))) for line in lines[0:3]: t = Tweet(json.loads(line)) mock_feels.on_data(t) self.assertEqual(len(mock_feels._tweet_buffer), 3) for line in lines[3:6]: t = Tweet(json.loads(line)) mock_feels.on_data(t) time.sleep(1) #this waits for items to finish popping off the buffer self.assertEqual(len(mock_feels._tweet_buffer), 0) dfs = [df for df in mock_feels._feels.all] self.assertEqual(len(dfs[0]), 6) os.remove('sample.sqlite')
class TweetFeels(object): """ The controller. :param credentials: A list of your 4 credential components. :param tracking: A list of keywords to track. :param db: A sqlite database to store data. Will be created if it doesn't already exist. Will append if it exists. :ivar lang: A list of languages to include in tweet gathering. :ivar buffer_limit: When the number of tweets in the buffer hits this limit all tweets in the buffer gets flushed to the database. :ivar connected: Tells you if TweetFeels is connected and listening to Twitter. :ivar sentiment: The real-time sentiment score. :ivar binsize: The fixed observation interval between new sentiment calculations. (default = 60 seconds) :ivar factor: The fall-off factor used in real-time sentiment calculation. (default = 0.99) """ _db_factory = (lambda db: TweetData(db)) _listener_factory = (lambda ctrl: TweetListener(ctrl)) _stream_factory = (lambda auth, listener: Stream(auth, listener)) def __init__(self, credentials, tracking=[], db='feels.sqlite'): self._feels = TweetFeels._db_factory(db) _auth = OAuthHandler(credentials[0], credentials[1]) _auth.set_access_token(credentials[2], credentials[3]) self._listener = TweetFeels._listener_factory(self) self._stream = TweetFeels._stream_factory(_auth, self._listener) self.tracking = tracking self.lang = ['en'] self._sentiment = 0 self._filter_level = 'low' self._bin_size = timedelta(seconds=60) self._latest_calc = self._feels.start self._tweet_buffer = deque() self.buffer_limit = 50 self._factor = 0.99 @property def binsize(self): return self._bin_size @binsize.setter def binsize(self, value): assert(isinstance(value, timedelta)) if value != self._bin_size: self._latest_calc = self._feels.start self._bin_size = value @property def factor(self): return self._factor @factor.setter def factor(self, value): assert(value<=1 and value>0) self._latest_calc = self._feels.start self._factor = value @property def connected(self): return self._stream.running @property def sentiment(self): end = self._feels.end sentiments = self.sentiments( strt=self._latest_calc, end=end, delta_time=self._bin_size ) ret = None for s in sentiments: ret = s return ret def start(self, seconds=None, selfupdate=60): """ Start listening to the stream. :param seconds: If you want to automatically disconnect after a certain amount of time, pass the number of seconds into this parameter. :param selfupdate: Number of seconds between auto-calculate. """ def delayed_stop(): time.sleep(seconds) print('Timer completed. Disconnecting now...') self.stop() def self_update(): while self.connected: time.sleep(selfupdate) self.sentiment if len(self.tracking) == 0: print('Nothing to track!') else: self._stream.filter( track=self.tracking, languages=self.lang, async=True ) # This does not work due to upstream bug in tweepy 3.5.0. They have fixed it in # https://github.com/tweepy/tweepy/pull/783 # self._stream.filter( # track=self.tracking, languages=self.lang, async=True, # filter_level=self._filter_level # ) if seconds is not None: t = Thread(target=delayed_stop) t.start() if selfupdate is not None and selfupdate > 0: t2 = Thread(target=self_update) t2.start() def stop(self): """ Disconnect from the stream. Warning: Connecting and disconnecting too frequently will get you blacklisted by Twitter. Your connections should be long-lived. """ self._stream.disconnect() def on_data(self, data): """ Called by :class:`TweetListener` when new tweet data is recieved. Note: Due to upstream bug in tweepy for python3, it cannot handle the `filter_level` parameter in the `Stream.filter` function. Therefore, we'll take care of it here. The problem has been identified and fixed by the tweepy team here: https://github.com/tweepy/tweepy/pull/783 :param data: The tweet data. Should be a single :class:`Tweet`. :type data: Tweet """ filter_value = {'none': 0, 'low': 1, 'medium': 2} value = filter_value[data['filter_level']] if value >= filter_value[self._filter_level]: self._tweet_buffer.append(data) if len(self._tweet_buffer) > self.buffer_limit: t = Thread(target=self.clear_buffer) t.start() def clear_buffer(self): """ Pops all the tweets currently in the buffer and puts them into the db. """ while True: try: # The insert calculates sentiment values self._feels.insert_tweet(self._tweet_buffer.popleft()) except IndexError: break def on_error(self, status): """ Called by :class:`TweetListener` when an error is recieved. """ self.start() def sentiments(self, strt=None, end=None, delta_time=None): """ Provides a generator for sentiment values in ``delta_time`` increments. :param start: The start time at which the generator yeilds a value. If not provided, generator will start from beginning of your dataset. :type start: datetime :param end: The ending datetime of the series. If not provided, generator will not stop until it reaches the end of your dataset. :type end: datetime :param delta_time: The time length that each sentiment value represents. If not provided, the generator will use the setting configured by :class:`TweetFeels`. :type delta_time: timedelta """ beginning = self._feels.start if strt is None: self._latest_calc = beginning strt = beginning else: self._latest_calc = max(strt, self._feels.start) if end is None: end = self._feels.end if delta_time is None: delta_time = self._bin_size # get to the starting point if strt < self._latest_calc: self._sentiment = 0 df = self._feels.tweets_between(beginning, strt) else: df = self._feels.tweets_between(self._latest_calc, strt) self._sentiment = self.model_sentiment( df, self._sentiment, self._factor ) self._latest_calc = strt # start yielding sentiment values end = min(end, self._feels.end) if self._latest_calc < end: dfs = self._feels.fetchbin( start=self._latest_calc, end=end, binsize=delta_time ) sentiment = deque() for df in dfs: try: # only save sentiment value if not the last element self._sentiment = sentiment.popleft() except IndexError: pass sentiment.append( self.model_sentiment(df[0], self._sentiment, self._factor) ) self._latest_calc = df[1] # Yield the latest element yield sentiment[-1] else: # this only happens when strt >= end yield self._sentiment def model_sentiment(self, df, s, fo=0.99): """ Defines the real-time sentiment model given a dataframe of tweets. :param df: A tweets dataframe. :param s: The initial sentiment value to begin calculation. :param fo: Fall-off factor """ df = df.loc[df.sentiment != 0] # drop rows having 0 sentiment if(len(df)>0): try: val = np.average( df.sentiment, weights=df.followers_count+df.friends_count ) except ZeroDivisionError: val = 0 s = s*fo + val*(1-fo) return s
class Test_Data(unittest.TestCase): def setUp(self): self.tweets_data_path = 'test/sample.json' self.db = './test.sqlite' self.feels_db = TweetData(self.db) self.tweets = [ { 'created_at': 'Sun Feb 19 19:14:18 +0000 2017', 'id_str': '833394296418082817', 'text': 'Tweetfeels is tremendous! Believe me. I know.', 'user': { 'followers_count': '100', 'friends_count': '200', 'location': None } }, # sentiment value = 0 { 'created_at': 'Sun Feb 20 19:14:19 +0000 2017', 'id_str': '833394296418082818', 'text': 'Fake news. Sad!', 'user': { 'followers_count': '200', 'friends_count': '200', 'location': None } }, # sentiment value = -0.7351 { 'created_at': 'Sun Feb 21 19:14:20 +0000 2017', 'id_str': '833394296418082819', 'text': 'I hate it.', 'user': { 'followers_count': '200', 'friends_count': '200', 'location': None } } # sentiment value = -0.5719 ] self.mock_tweets = [Tweet(t) for t in self.tweets] def tearDown(self): os.remove(self.db) def test_file_creation(self): self.assertTrue(os.path.exists(self.db)) def test_fields(self): f = self.feels_db.fields self.assertTrue(isinstance(f, tuple)) self.assertTrue(len(f) >= 11) def test_start(self): self.assertTrue(isinstance(self.feels_db.start, datetime)) def test_dates(self): for t in self.mock_tweets: self.feels_db.insert_tweet(t) self.assertEqual(len(self.feels_db.tweet_dates), 3) tweets = [] with open(self.tweets_data_path) as tweets_file: lines = filter(None, (line.rstrip() for line in tweets_file)) for line in lines: try: tweets.append(Tweet(json.loads(line))) except KeyError: pass for t in tweets: self.feels_db.insert_tweet(t) self.assertEqual(len(self.feels_db.tweet_dates), 105) df = self.feels_db.tweet_dates timebox = timedelta(seconds=60) second = timedelta(seconds=1) df = df.groupby(pd.TimeGrouper(freq=f'{int(timebox/second)}S')).size() df = df[df != 0] print(df) self.assertEqual(len(df), 3) self.assertEqual(df.iloc[0], 103) def test_fetch(self): tweets = [] with open(self.tweets_data_path) as tweets_file: lines = filter(None, (line.rstrip() for line in tweets_file)) for line in lines: try: tweets.append(Tweet(json.loads(line))) except KeyError: pass for t in tweets: self.feels_db.insert_tweet(t) for t in self.mock_tweets: self.feels_db.insert_tweet(t) it = self.feels_db.fetchbin(binsize=timedelta(minutes=30)) cur = next(it) self.assertEqual(cur.end - cur.start, timedelta(minutes=30)) self.assertEqual(len(cur), 103) cur = next(it) self.assertEqual(len(cur), 1) cur = next(it) self.assertEqual(len(cur), 1) def test_empty(self): for t in self.mock_tweets: self.feels_db.insert_tweet(t) it = self.feels_db.fetchbin(binsize=timedelta(hours=12), empty=True) cur = next(it) self.assertEqual(len(cur), 1) cur = next(it) self.assertEqual(len(cur), 0) cur = next(it) self.assertEqual(len(cur), 1) cur = next(it) cur = next(it) self.assertEqual(len(cur), 1) def test_bin(self): for t in self.mock_tweets: self.feels_db.insert_tweet(t) it = self.feels_db.fetchbin(binsize=timedelta(hours=12), empty=True) cur = next(it) self.assertEqual(cur.influence, 300) cur = next(it) self.assertEqual(cur.influence, 0) cur = next(it) self.assertEqual(cur.influence, 400) cur = next(it) cur = next(it) self.assertEqual(cur.influence, 400) def test_data_operation(self): twt = { 'created_at': 'Sun Feb 19 19:14:18 +0000 2017', 'id_str': '833394296418082817', 'text': 'All the feels!' } t = Tweet(twt) self.assertEqual(len(t.keys()), 7) self.feels_db.insert_tweet(t) b = self.feels_db.tweets_since(datetime.now()) self.assertEqual(len(b), 0) b = self.feels_db.tweets_since(0) self.assertEqual(len(b), 1) b.df.sentiment = 0.9 for row in b.df.itertuples(): self.feels_db.update_tweet({ 'id_str': row.id_str, 'sentiment': row.sentiment }) start = datetime(2017, 2, 17, 0, 0, 0) before = datetime(2017, 2, 18, 0, 0, 0) after = datetime(2017, 2, 20, 0, 0, 0) b = self.feels_db.tweets_between(start, before) self.assertEqual(len(b), 0) b = self.feels_db.tweets_between(start, after) self.assertEqual(len(b), 1)
class TweetFeels(object): """ The controller. :param credentials: A list of your 4 credential components. :param tracking: A list of keywords to track. :param db: A sqlite database to store data. Will be created if it doesn't already exist. Will append if it exists. :ivar calc_every_n: Wont calculate new sentiment until there are n records in the queue. :ivar lang: A list of languages to include in tweet gathering. """ _db_factory = (lambda db: TweetData(db)) _listener_factory = (lambda ctrl: TweetListener(ctrl)) _stream_factory = (lambda auth, listener: Stream(auth, listener)) def __init__(self, credentials, tracking=[], db='feels.sqlite'): self._feels = TweetFeels._db_factory(db) _auth = OAuthHandler(credentials[0], credentials[1]) _auth.set_access_token(credentials[2], credentials[3]) self._listener = TweetFeels._listener_factory(self) self._stream = TweetFeels._stream_factory(_auth, self._listener) self.tracking = tracking self.lang = ['en'] self._sentiment = 0 self._filter_level = 'low' self.calc_every_n = 10 self._latest_calc = 0 self._tweet_buffer = deque() self.buffer_limit = 50 def start(self, seconds=None): def delayed_stop(): time.sleep(seconds) print('Timer completed. Disconnecting now...') self.stop() if len(self.tracking) == 0: print('Nothing to track!') else: self._stream.filter(track=self.tracking, languages=self.lang, async=True) # This does not work due to upstream bug in tweepy 3.5.0. They have fixed it in # https://github.com/tweepy/tweepy/pull/783 # self._stream.filter( # track=self.tracking, languages=self.lang, async=True, # filter_level=self._filter_level # ) if seconds is not None: t = Thread(target=delayed_stop) t.start() def stop(self): self._stream.disconnect() def on_data(self, data): """ Note: Due to upstream bug in tweepy for python3, it cannot handle the `filter_level` parameter in the `Stream.filter` function. Therefore, we'll take care of it here. The problem has been identified and fixed by the tweepy team here: https://github.com/tweepy/tweepy/pull/783 """ filter_value = {'none': 0, 'low': 1, 'medium': 2} value = filter_value[data['filter_level']] if value >= filter_value[self._filter_level]: self._tweet_buffer.append(data) if len(self._tweet_buffer) > self.buffer_limit: t = Thread(target=self.clear_buffer) t.start() def clear_buffer(self): while True: try: # The insert calculates sentiment values self._feels.insert_tweet(self._tweet_buffer.popleft()) except IndexError: break def on_error(self, status): self.start() @property def connected(self): return self._stream.running @property def sentiment(self): def avg_sentiment(df): avg = 0 try: avg = np.average(df.sentiment, weights=df.followers_count + df.friends_count) except ZeroDivisionError: avg = 0 return avg dfs = self._feels.tweets_since(self._latest_calc) for df in dfs: if (len(df) > self.calc_every_n): df = df.loc[df.sentiment != 0] # drop rows having 0 sentiment df = df.groupby('created_at') df = df.apply(avg_sentiment) df = df.sort_index() for row in df.iteritems(): self._sentiment = self._sentiment * 0.99 + row[1] * 0.01 self._latest_calc = df.tail(1).index.to_pydatetime()[0] return self._sentiment
def setUp(self): self.tweets_data_path = 'test/sample.json' self.db = './test.sqlite' self.feels_db = TweetData(self.db)
class TweetFeels(object): """ The controller. :param credentials: A list of your 4 credential components. :param tracking: A list of keywords to track. :param db: A sqlite database to store data. Will be created if it doesn't already exist. Will append if it exists. :ivar calc_every_n: Wont calculate new sentiment until there are n records in the queue. :ivar lang: A list of languages to include in tweet gathering. """ def __init__(self, credentials, tracking=[], db='feels.sqlite'): self._listener = TweetListener(self.on_data, self.on_error) self._feels = TweetData(db) _auth = OAuthHandler(credentials[0], credentials[1]) _auth.set_access_token(credentials[2], credentials[3]) self._stream = Stream(_auth, self._listener) self.tracking = tracking self.lang = ['en'] self._sentiment = 0 self._filter_level = 'low' self.calc_every_n = 10 def start(self, seconds=None): def delayed_stop(): time.sleep(seconds) print('Timer completed. Disconnecting now...') self.stop() if len(self.tracking) == 0: print('Nothing to track!') else: self._stream.filter(track=self.tracking, languages=self.lang, async=True) # This does not work due to upstream bug in tweepy 3.5.0. They have fixed it in # https://github.com/tweepy/tweepy/pull/783 # self._stream.filter( # track=self.tracking, languages=self.lang, async=True, # filter_level=self._filter_level # ) if seconds is not None: t = Thread(target=delayed_stop) t.start() def stop(self): self._stream.disconnect() def on_data(self, data): """ Note: Due to upstream bug in tweepy for python3, it cannot handle the `filter_level` parameter in the `Stream.filter` function. Therefore, we'll take care of it here. The problem has been identified and fixed by the tweepy team here: https://github.com/tweepy/tweepy/pull/783 """ filter_value = {'none': 0, 'low': 1, 'medium': 2} value = filter_value[data['filter_level']] if value >= filter_value[self._filter_level]: self._feels.insert_tweet(data) def on_error(self, status): pass def _intensity(self, tweet): t = clean(tweet) return SentimentIntensityAnalyzer().polarity_scores(t)['compound'] @property def sentiment(self): df = self._feels.queue if (len(df) > self.calc_every_n): df.sentiment = df.text.apply(self._intensity) for row in df.itertuples(): self._feels.update_tweet({ 'id_str': row.id_str, 'sentiment': row.sentiment }) df = df.loc[df.sentiment != 0] # drop rows having 0 sentiment df = df.groupby('created_at') df = df.apply( lambda x: np.average(x.sentiment, weights=x.followers_count)) df = df.sort_index() for row in df.iteritems(): self._sentiment = self._sentiment * 0.99 + row[1] * 0.01 return self._sentiment
class Test_Feels(unittest.TestCase): def setUp(self): TweetFeels._db_factory = (lambda db: MagicMock()) TweetFeels._auth_factory = (lambda cred: MagicMock()) TweetFeels._listener_factory = (lambda ctrl: MagicMock()) TweetFeels._stream_factory = (lambda auth, listener: MagicMock()) self.tweets_data_path = 'test/sample.json' self.tweets = [ {'created_at': 'Sun Feb 19 09:14:18 +0000 2017', 'id_str': '833394296418082817', 'text': 'Tweetfeels is tremendous! Believe me. I know.', 'user': {'followers_count': '100', 'friends_count': '200', 'location':None} }, # sentiment value = 0 {'created_at': 'Sun Feb 21 18:14:19 +0000 2017', 'id_str': '833394296418082818', 'text': 'Fake news. Sad!', 'user': {'followers_count': '100', 'friends_count': '200', 'location':None} }, # sentiment value = -0.7351 {'created_at': 'Sun Feb 21 19:14:20 +0000 2017', 'id_str': '833394296418082819', 'text': 'I hate it.', 'user': {'followers_count': '100', 'friends_count': '200', 'location':None} } # sentiment value = -0.5719 ] self.mock_feels = TweetFeels('abcd') self.feels_db = TweetData(file='./test/db.sqlite') self.mock_feels._feels = self.feels_db self.mock_tweets = [Tweet(t) for t in self.tweets] for t in self.mock_tweets: self.feels_db.insert_tweet(t) self.mock_feels.clear_buffer() def tearDown(self): os.remove('./test/db.sqlite') def test_start(self): mock_feels = TweetFeels("abcd") mock_feels.tracking = [] mock_feels.start(selfupdate=0) mock_feels._stream.filter.assert_not_called() mock_feels.tracking = ['tsla'] mock_feels.start(selfupdate=0) mock_feels._stream.filter.assert_called_once() def test_stop(self): mock_feels = TweetFeels("abcd") mock_feels.stop() mock_feels._stream.disconnect.assert_called_once() def test_on_data(self): mock_feels = TweetFeels("abcd") mock_feels.buffer_limit = 0 data = {'filter_level': 'low', 'text': 'test data'} mock_feels.on_data(data) mock_feels._feels.insert_tweet.assert_called_once() # test filtering levels mock_feels2 = TweetFeels("abcd") mock_feels2._filter_level = 'medium' mock_feels2.on_data(data) mock_feels2._feels.insert_tweet.assert_not_called() # test buffer limit. no inserts until we are over limit mock_feels2.buffer_limit = 2 mock_feels2.filter_level = 'low' mock_feels2.on_data(data) mock_feels2._feels.insert_tweet.assert_not_called() mock_feels2.on_data(data) mock_feels2.on_data(data) mock_feels._feels.insert_tweet.assert_called_once() def test_sentiment(self): mock_feels = TweetFeels("abcd") mock_feels._feels.tweets_since = MagicMock(return_value=[]) mock_feels._sentiment = Sentiment(0.5, 0, 0, 0) mock_feels._latest_calc = datetime(2017, 1, 1, 0, 0, 0) mock_feels._feels.start = datetime(2017, 1, 1, 0, 0, 0) mock_feels._feels.end = datetime(2017, 1, 1, 0, 0, 0) self.assertEqual(mock_feels.sentiment.value, 0.5) def test_buffer(self): mock_feels = TweetFeels('abcd') mock_feels.buffer_limit = 5 feels_db = TweetData(file='sample.sqlite') mock_feels._feels = feels_db with open(self.tweets_data_path) as tweets_file: lines = list(filter(None, (line.rstrip() for line in tweets_file))) for line in lines[0:3]: t = Tweet(json.loads(line)) mock_feels.on_data(t) self.assertEqual(len(mock_feels._tweet_buffer), 3) for line in lines[3:6]: t = Tweet(json.loads(line)) mock_feels.on_data(t) time.sleep(1) #this waits for items to finish popping off the buffer self.assertEqual(len(mock_feels._tweet_buffer), 0) dfs = [df for df in mock_feels._feels.all] self.assertEqual(len(dfs[0]), 6) os.remove('sample.sqlite') def test_sentiment_comprehensive(self): sentiment = 0.0 for t in self.mock_tweets: if t['sentiment']!=0: sentiment = 0.99*sentiment + 0.01*t['sentiment'] # calc = 0*0.99**2 + 0.01*0.99*-0.7531 + 0.01*-0.5719 # = -0.01299649 self.mock_feels._latest_calc = self.mock_feels._feels.start self.assertTrue(np.isclose(self.mock_feels.sentiment.value, sentiment)) # first observation is at 2017-2-19 19:14:18 and we are using default # 60 second bins, therefore the observation at 2017-2-21 19:14:20 will # never get saved but will always be recalculated. self.assertEqual(self.mock_feels._latest_calc, datetime(2017, 2, 21, 19, 14, 0)) # repeat the calculation, nothing changes self.assertTrue(np.isclose(self.mock_feels.sentiment.value, sentiment)) self.assertEqual(self.mock_feels._latest_calc, datetime(2017, 2, 21, 19, 14, 0)) self.assertEqual(self.mock_feels.sentiment.start, self.mock_feels._latest_calc) def test_sentiment_factor(self): sentiment = 0.0 self.mock_feels.factor = 0.75 for t in self.mock_tweets: if t['sentiment']!=0: sentiment = 0.75*sentiment + 0.25*t['sentiment'] # calc = 0*0.75**2 + 0.25*0.75*-0.7531 + 0.25*-0.5719 # = -0.28418125 mock_sentiment = self.mock_feels.sentiment.value self.assertTrue(np.isclose(mock_sentiment, sentiment)) def test_sentiment_binsize(self): T = self.mock_tweets A = T[1]['sentiment'] B = T[2]['sentiment'] sentiment = 0.75*0 + 0.25*(A+B)/2 self.mock_feels.factor = 0.75 self.mock_feels.binsize = timedelta(days=2.5) mock_sentiment = self.mock_feels.sentiment.value self.assertTrue(np.isclose(mock_sentiment, sentiment)) def test_nans(self): sentiments = self.mock_feels.sentiments( delta_time=timedelta(hours=24), nans=True) s = next(sentiments) self.assertEqual(s.value, 0) s = next(sentiments) self.assertTrue(np.isnan(s.value)) # can return nans # does not affect current sentiment self.assertEqual(self.mock_feels._sentiment.value, 0) s = next(sentiments) self.assertTrue(s.value<0) def test_sentiments(self): start = datetime(2017, 2, 19, 0, 0, 0) dt = timedelta(minutes=30) sentiment = self.mock_feels.sentiments(strt=start, delta_time=dt) self.assertTrue(np.isclose(next(sentiment).value, 0)) self.assertTrue(np.isclose(next(sentiment).value, -0.007351)) self.assertTrue(np.isclose(next(sentiment).value, -0.01299649)) for s in sentiment: print(s) # we are starting at 2017-2-19 19:00:00 and using bins with length 30 # minutes, therefore our latest calc will be just prior to the final # observation. self.assertEqual(self.mock_feels._latest_calc, datetime(2017, 2, 21, 19, 0, 0))