def test_sliding_company_score(redis_client, foo_company): ' should update average on every tweet received ' db.write_tweet(redis_client, foo_company, MockTweet(id=100), 0.1) db.write_tweet(redis_client, foo_company, MockTweet(id=101), 0.2) db.set_company_score(redis_client, foo_company, 0.15) api = MockApi( MockTweet(id=102, text='0.3'), MockTweet(id=103, text='-0.4'), MockTweet(id=104, text='0.4'), MockTweet(id=105, text='-0.5') ) tweets_processed = 0 for res in sync_tweets(api, redis_client, foo_company, max_tweets=4): if tweets_processed == 0: # [100, 101, 102] assert db.get_tweets_timeline(redis_client, foo_company, withscores=False) == [b'100', b'101', b'102'] expected_score = (0.1 + 0.2 + 0.3) / 3 assert db.get_company_score(redis_client, foo_company) == approx(expected_score) elif tweets_processed == 1: assert db.get_tweets_timeline(redis_client, foo_company, withscores=False) == [b'100', b'101', b'102', b'103'] expected_score = (0.1 + 0.2 + 0.3 - 0.4) / 4 assert db.get_company_score(redis_client, foo_company) == approx(expected_score) elif tweets_processed == 2: assert db.get_tweets_timeline(redis_client, foo_company, withscores=False) == [b'101', b'102', b'103', b'104'] expected_score = (0.2 + 0.3 - 0.4 + 0.4) / 4 assert db.get_company_score(redis_client, foo_company) == approx(expected_score) tweets_processed += 1 assert db.get_tweets_timeline(redis_client, foo_company, withscores=False) == [b'102', b'103', b'104', b'105'] expected_score = (0.3 + 0.4 - 0.4 - 0.5) / 4 assert db.get_company_score(redis_client, foo_company) == approx(expected_score)
def sync_tweets(api, redis_client, account, limit=inf, max_tweets=inf): ''' will fetch max of `limit` tweets from Twitter mentioning `@account`, analyze them and write to db ''' since_id = db.get_max_tweet_id(redis_client, account) print('Started syncing tweets mentioning ', account, 'starting from ', since_id) tweets_processed = 0 for tweet in twitter.get_tweets_about_company(api, account, since_id=since_id): if tweets_processed >= limit: break score = get_sentiment_score(tweet.text, tweet.entities) if score == 0.0: continue with redis_client.pipeline() as pipe: pipe.watch('tweetsabout:{0}'.format(account)) n_tweets = db.get_number_of_tweets(pipe, account) current_score = db.get_company_score(redis_client, account) tweets_to_remove = db.get_tweets_timeline( pipe, account, 1, withscores=False) if n_tweets >= max_tweets else [] removed_tweets_score = sum([ db.get_tweet_score(pipe, account, tweet_id) for tweet_id in tweets_to_remove ]) pipe.multi() # batch-process insert operation db.write_tweet(pipe, account, tweet, score) db.remove_tweets(pipe, account, *tweets_to_remove) # sliding re-calculation of the score new_score = ( (current_score * n_tweets - removed_tweets_score + score) / (n_tweets + 1 - len(tweets_to_remove))) db.set_company_score(pipe, account, new_score) pipe.execute() tweets_processed += 1 yield { 'account': account, 'tweet': tweet, 'score': score, 'removed': tweets_to_remove }
def test_positive_neagtive(redis_client, foo_company): ' should calculate average score for given tweets ' output_tweets = [ MockTweet(id=0, text='0.10'), MockTweet(id=1, text='-0.10'), ] api = MockApi(*output_tweets) for res in sync_tweets(api, redis_client, foo_company): pass assert db.get_company_score(redis_client, foo_company) == approx(0)
def test_average_score(redis_client, foo_company): ' should calculate average score for given tweets ' output_tweets = [ MockTweet(id=0, text='0.10'), MockTweet(id=1, text='0.20'), MockTweet(id=2, text='0.30'), ] api = MockApi(*output_tweets) for res in sync_tweets(api, redis_client, foo_company): pass expected = (0.10 + 0.20 + 0.30) / 3 assert db.get_company_score(redis_client, foo_company) == approx(expected)
def test_skip_neutral_tweets(redis_client, foo_company): ' should skip neutral tweets ' output_tweets = [ MockTweet(id=0, text='0.0'), MockTweet(id=1, text='0.0'), MockTweet(id=2, text='0.1'), ] api = MockApi(*output_tweets) for res in sync_tweets(api, redis_client, foo_company): pass assert db.get_number_of_tweets(redis_client, foo_company) == 1 assert db.get_company_score(redis_client, foo_company) == approx(0.1)
def get_companies_details(): ''' will get full list of companies, with details ''' accounts = db.get_companies(r) companies_details = [] for account in accounts: number_of_tweets = db.get_number_of_tweets(r, account) if number_of_tweets < 500: # TODO: Replace me with env constant continue company_details = db.get_company_details(r, account) company_details['number_of_tweets'] = number_of_tweets company_details['score'] = db.get_company_score(r, account) companies_details.append(company_details) return jsonify({'companies': companies_details})
def test_limit_skip_neutral(redis_client, foo_company): ' should respect limit while skipping neutral tweets ' output_tweets = [ MockTweet(id=0, text='0.1'), MockTweet(id=1, text='0.0'), # should be ignored MockTweet(id=2, text='0.0'), # should be ignored MockTweet(id=3, text='0.2'), MockTweet(id=4, text='0.2'), # should be ignored MockTweet(id=5, text='0.0'), # should be ignored ] api = MockApi(*output_tweets) for res in sync_tweets(api, redis_client, foo_company, limit=2): print('res', res) assert db.get_number_of_tweets(redis_client, foo_company) == 2 assert db.get_company_score(redis_client, foo_company) == approx(0.15)
def test_max_tweets(redis_client, foo_company): ' should remove tweets if `max_tweets` was reached ' db.write_tweet(redis_client, foo_company, MockTweet(id=100), 0.1) db.write_tweet(redis_client, foo_company, MockTweet(id=101), 0.2) db.set_company_score(redis_client, foo_company, 0.15) api = MockApi( MockTweet(id=102, text='0.3'), MockTweet(id=103, text='0.4') ) for res in sync_tweets(api, redis_client, foo_company, max_tweets=3): pass assert db.get_number_of_tweets(redis_client, foo_company) == 3 expected_score = (0.2 + 0.3 + 0.4) / 3 assert db.get_company_score(redis_client, foo_company) == approx(expected_score) assert db.get_tweets_timeline(redis_client, foo_company, withscores=False) == [b'101', b'102', b'103']
def test_prior_tweets(redis_client, foo_company): ' should update score correctly if some tweets existed prior to syncing ' db.write_tweet(redis_client, foo_company, MockTweet(id=100), 0.1) db.write_tweet(redis_client, foo_company, MockTweet(id=101), 0.2) db.set_company_score(redis_client, foo_company, 0.15) assert db.get_number_of_tweets(redis_client, foo_company) == 2 api = MockApi( MockTweet(id=102, text='0.3'), MockTweet(id=103, text='0.4') ) for res in sync_tweets(api, redis_client, foo_company): pass assert db.get_number_of_tweets(redis_client, foo_company) == 4 expected_score = (0.1 + 0.2 + 0.3 + 0.4) / 4 assert db.get_company_score(redis_client, foo_company) == approx(expected_score)
import redis from lib import db import sys r = redis.StrictRedis(host='redis', decode_responses=True) for company in db.get_companies(r): try: print('company', company, 'has', db.get_number_of_tweets(r, company), 'tweets') print('removing unneccessary') for tweet_id in db.get_tweets_timeline(r, company, withscores=False): tweet_score = db.get_tweet_score(r, company, tweet_id) if (tweet_score == 0.0): print('removing', tweet_id, tweet_score) db.remove_tweets(r, company, tweet_id) print('company', company, 'has', db.get_number_of_tweets(r, company), 'tweets') print('score before', db.get_company_score(r, company)) db.update_company_overall_score(r, company) print('score after', db.get_company_score(r, company)) except Exception: r.delete('companyscore:{0}'.format(company)) pass
def test_company_score(redis_client, foo_company): ' Should be able to get/set company score ' db.set_company_score(redis_client, foo_company, 0.25) assert db.get_company_score(redis_client, foo_company) == 0.25
def test_empty_company_score(redis_client, foo_company): ' Should return 0 for companies with no score set ' assert db.get_company_score(redis_client, foo_company) == 0