def botspy(dynamic): time_delta = request.args.get('time_delta') url = request.path str_time_range = stringtime(time_delta) today = str_today() most_active = db.session.query(User.user_scrname, User.user_cap_perc,\ func.count(Post.post_id), User.user_id).\ join(Post.user).\ filter(User.user_cap_perc >= 43.0).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(User.user_id).order_by(func.count(Post.post_id).desc()).all() bot_hashtags = db.session.query(Hashtag.hashtag, func.count(Hashtag.hashtag)).\ join(Post.user).join(Post.hashtags).\ filter(User.user_cap_perc >= 43.0).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(Hashtag.hashtag).order_by(func.count(Hashtag.hashtag).desc()).all() most_retweeted_tweets = db.session.query(Post.post_id, Post.original_author_scrname, \ Post.retweet_count, Post.original_tweet_id, User.user_scrname, Post.tweet_html,\ Post.text, Post.original_text).\ join(Post.user).\ filter(User.user_cap_perc >= 43.0).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ filter(Post.is_retweet == 0).\ order_by(Post.retweet_count.desc()).all() most_active_districts = db.session.query(District.district_name,\ func.count(District.district_name)).\ join(Post.user).join(Post.districts).\ filter(User.user_cap_perc >= 43.0).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(District.district_name).\ order_by(func.count(District.district_name).desc()).all() most_retweeted_tweet_list = get_tweet_list_nodist(most_retweeted_tweets) popular_bot = db.session.query(User.user_scrname, User.user_followers,\ User.user_id).\ filter(User.user_cap_perc >= 43.0).\ group_by(User.user_id).order_by(User.user_followers.desc()).all() avg_bot_raw = db.session.query(func.avg(User.user_followers)).\ filter(User.user_cap_perc >= 43.0).\ first() avg_bot = int(avg_bot_raw[0]) #Get botweather chart data from graph_functions modules botchart = gf.botweather_chart() return render_template('botspy.html', time_delta=time_delta,\ most_active=most_active, bot_hashtags=bot_hashtags, \ most_retweeted_tweets=most_retweeted_tweets, popular_bot=popular_bot,\ avg_bot=avg_bot, botchart=botchart, get_tweet=get_tweet, \ most_retweeted_tweet_list=most_retweeted_tweet_list,\ most_active_districts=most_active_districts)
def fill_hash_activity(dist_group, time_delta, table, table_new, table_old): if dist_group == "allcong": dist_fig = 1 if dist_group == "allsen": dist_fig = 2 #CREATE CONNECTION FOR DIRECT DB ACCESS conn = db.engine.connect() conn.execute("DROP TABLE IF EXISTS {0}, {1}".format(table_new, table_old)) if time_delta == 14: conn.execute('CREATE TABLE IF NOT EXISTS {0} LIKE hash_activity_{1}_1;'.format(table, dist_group)) conn.execute('CREATE TABLE {0} LIKE {1}'.format(table_new, table)) #conn.execute("DELETE FROM {0}".format(table_new)) #GET ORDERED LIST OF HASHTAGS AND ACTIVITY COUNT str_time_range = stringtime(time_delta) if dist_group == "allcong" or dist_group == "allsen": all_hashes = db.session.query(Hashtag.hash_id, Hashtag.hashtag,\ func.count(Hashtag.hash_id)).\ join(Post.hashtags).join(Post.districts).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ filter(District.dist_type==dist_fig).\ group_by(Hashtag.hash_id).order_by(func.count(Hashtag.hash_id).desc()).all() else: all_hashes = db.session.query(Hashtag.hash_id, Hashtag.hashtag,\ func.count(Hashtag.hash_id)).\ join(Post.hashtags).join(Post.districts).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(Hashtag.hash_id).order_by(func.count(Hashtag.hash_id).desc()).all() #INSERT TOP 100 ITEMS INTO SHADOW TABLE (TABLE_NEW) counter = 1 for item in all_hashes[0:100]: conn.execute("INSERT INTO {0} VALUES ({1}, {2}, '{3}', {4});".\ format(table_new, counter, item[0], item[1], item[2])) counter += 1 #RENAME EXISTING (MAIN) TABLE TO OLD, RENAME SHADOW TO MAIN conn.execute('RENAME TABLE {0} TO {1}, {2} TO {3}'.\ format(table, table_old, table_new, table)) conn.close()
def fill_retweeted_users(dist_group, time_delta, table, table_new, table_old): if dist_group == "allcong": dist_fig = 1 if dist_group == "allsen": dist_fig = 2 conn = db.engine.connect() conn.execute("DROP TABLE IF EXISTS {0}, {1}".format(table_new, table_old)) if time_delta == 14: conn.execute('CREATE TABLE IF NOT EXISTS {0} LIKE retweeted_users_1;'.format(table)) conn.execute('CREATE TABLE {0} LIKE {1}'.format(table_new, table)) #conn.execute("DELETE FROM {0}".format(table_new)) #GET ORDERED LIST OF HASHTAGS AND ACTIVITY COUNT str_time_range = stringtime(time_delta) if dist_group == "allcong" or dist_group == "allsen": retweeted_users = db.session.query(Post.original_author_scrname, \ func.count(Post.original_author_scrname)).\ join(Post.districts).\ filter(Post.original_author_scrname != "").filter(District.dist_type==dist_fig).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(Post.original_author_scrname).order_by(func.count(Post.original_author_scrname).\ desc()).all() else: retweeted_users = db.session.query(Post.original_author_scrname, \ func.count(Post.original_author_scrname)).\ filter(Post.original_author_scrname != "").\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(Post.original_author_scrname).order_by(func.count(Post.original_author_scrname).\ desc()).all() counter = 1 for item in retweeted_users[0:100]: conn.execute("INSERT INTO {0} VALUES ({1}, '{2}', {3});".\ format(table_new, counter, item[0], item[1])) counter += 1 conn.execute('RENAME TABLE {0} TO {1}, {2} TO {3}'.\ format(table, table_old, table_new, table)) conn.close()
def sen_cache(time_delta): str_time_range = stringtime(time_delta) with open('app/comp_races_parsed_sen.csv', 'r') as f: reader = csv.reader(f) for row in reader: #row_split = row.split(',') dynamic = row[0][1:6] print("District is {}".format(dynamic)) dist_hashes = db.session.query(Hashtag.hashtag, func.count(Hashtag.hashtag)).\ join(Post.districts).join(Post.hashtags).\ filter(District.district_name==dynamic).\ filter(Post.created_at >= str_time_range).\ group_by(Hashtag.hashtag).\ order_by(func.count(Hashtag.hashtag).desc()).all() print(len(dist_hashes)) top_tweeters = db.session.query(User.user_scrname, func.count(User.user_scrname),\ User.user_cap_perc, User.user_id).\ join(Post.user).join(Post.districts).\ filter(District.district_name==dynamic).\ filter(Post.created_at >= str_time_range).\ group_by(User.user_id).\ order_by(func.count(User.user_id).desc()).all() print(len(top_tweeters)) most_retweeted = db.session.query(Post.original_author_scrname, \ func.count(Post.original_author_scrname)).\ join(Post.districts).\ filter(District.district_name==dynamic).\ filter(Post.created_at >= str_time_range).\ filter(Post.original_author_scrname != "").\ group_by(Post.original_author_scrname).\ order_by(func.count(Post.original_author_scrname).desc()).all() print(len(most_retweeted))
def fill_retweeted_tweets(dist_group, time_delta, table, table_new, table_old): print("starting {}".format(table)) if dist_group == "allcong": dist_fig = 1 if dist_group == "allsen": dist_fig = 2 conn = db.engine.connect() conn.execute("DROP TABLE IF EXISTS {0}, {1}".format(table_new, table_old)) if time_delta == 14: conn.execute('CREATE TABLE IF NOT EXISTS {0} LIKE retweeted_tweets_1;'.format(table)) conn.execute('CREATE TABLE {0} LIKE {1}'.format(table_new, table)) #conn.execute("DELETE FROM {0}".format(table_new)) #Get uncleaned list of top retweeted tweet IDS str_time_range = stringtime(time_delta) # if dist_group == "allcong" or dist_group == "allsen": # most_retweeted_tweets = db.session.query(Post.post_id, Post.original_tweet_id, \ # Post.retweet_count, District.dist_type).\ # join(Post.districts).\ # filter(Post.created_at >= str_time_range).filter(District.dist_type==dist_fig).\ # order_by(Post.retweet_count.desc()).all() # # else: # most_retweeted_tweets = db.session.query(Post.post_id, Post.original_tweet_id, \ # Post.retweet_count, District.dist_type).\ # join(Post.districts).\ # filter(Post.created_at >= str_time_range).\ # order_by(Post.retweet_count.desc()).all() if dist_group == "allcong" or dist_group == "allsen": most_retweeted_tweets_inperiod = db.session.query(Post.original_tweet_id,\ func.count(Post.original_tweet_id)).\ join(Post.districts).\ filter(Post.is_retweet == 1).filter(District.dist_type==dist_fig).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(Post.original_tweet_id).\ order_by(func.count(Post.original_tweet_id).desc()).all() else: most_retweeted_tweets_inperiod = db.session.query(Post.original_tweet_id,\ func.count(Post.original_tweet_id)).\ join(Post.districts).\ filter(Post.is_retweet == 1).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(Post.original_tweet_id).\ order_by(func.count(Post.original_tweet_id).desc()).all() # Get cleaned list of tweet ids (no dupes, w/ district relevance) # List returns 20 tweets, with lists of following attributes: # [Post.post_id, relevant screen name, retweet_count, tweet_html (if exists), # original_tweet_id] # tweet_list = get_tweet_list_ids(most_retweeted_tweets) # #print(tweet_list[0]) # # # GET FULL LIST. RETURNS FOLLOWING ATTRIBUTE: # # [Post id, screen name, retweet count, botscore] # populated_list = populate_tweet_list(tweet_list) # Get list with attributes: # [post_id, user_scrname, post count, user_cap_perc, tweethtml] tweet_list_inperiod = get_tweet_list_inperiod(most_retweeted_tweets_inperiod) # pprint.pprint(populated_list[0]) counter = 1 for item in tweet_list_inperiod: x = '''INSERT INTO {table} VALUES ({counter}, '{post_id}', '{botscore}', '{name}', {retweets});'''.\ format(table=table_new, counter=counter, post_id=item[0], botscore=item[3], name=item[1], retweets=item[2]) print(x) conn.execute(x) counter += 1 conn.execute('RENAME TABLE {0} TO {1}, {2} TO {3}'.\ format(table, table_old, table_new, table)) conn.close() print("finished with table: {}".format(table))
def fill_dist_activity(dist_group, time_delta, table, table_new, table_old): if dist_group == "allcong": dist_fig = 1 if dist_group == "allsen": dist_fig = 2 #CREATE CONNECTION FOR DIRECT DB ACCESS conn = db.engine.connect() print("connected") conn.execute("DROP TABLE IF EXISTS {0}, {1};".format(table_new, table_old)) print("dropped tables") if time_delta == 14: conn.execute('CREATE TABLE IF NOT EXISTS {0} LIKE dist_activity_{1}_1;'.format(table, dist_group)) print("created base table") conn.execute('CREATE TABLE {0} LIKE {1};'.format(table_new, table)) print("created table {}".format(table_new)) #conn.execute("DELETE FROM {0};".format(table_new)) #GET ORDERED LIST OF DISTRICTS AND ACTIVITY COUNT str_time_range = stringtime(time_delta) #If partial group congress or senate if dist_group == "allcong" or dist_group == "allsen": most_active = db.session.query(District.district_name,\ func.count(District.district_name)).\ join(Post.districts).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ filter(District.dist_type==dist_fig).\ group_by(District.district_name).\ order_by(func.count(District.district_name).desc()).all() #If all races else: most_active = db.session.query(District.district_name,\ func.count(District.district_name)).\ join(Post.districts).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(District.district_name).\ order_by(func.count(District.district_name).desc()).all() counter = 1 for item in most_active: conn.execute("INSERT INTO {0} VALUES ({1}, '{2}', {3});".\ format(table_new, counter, item[0], item[1])) counter += 1 conn.execute('RENAME TABLE {0} TO {1}, {2} TO {3}'.\ format(table, table_old, table_new, table)) conn.close()
def fill_top_tweeters(dist_group, time_delta, table, table_new, table_old): if dist_group == "allcong": dist_fig = 1 if dist_group == "allsen": dist_fig = 2 conn = db.engine.connect() conn.execute("DROP TABLE IF EXISTS {0}, {1}".format(table_new, table_old)) if time_delta == 14: conn.execute('CREATE TABLE IF NOT EXISTS {0} LIKE top_tweeters_1;'.format(table)) conn.execute('CREATE TABLE {0} LIKE {1}'.format(table_new, table)) #conn.execute("DELETE FROM {0}".format(table_new)) #GET ORDERED LIST OF HASHTAGS AND ACTIVITY COUNT str_time_range = stringtime(time_delta) if dist_group == "allcong" or dist_group == "allsen": top_tweeters = db.session.query(User.user_id, User.user_scrname,\ User.user_cap_perc, func.count(User.user_scrname)).\ join(Post.user).join(Post.districts).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ filter(District.dist_type==dist_fig).\ group_by(User.user_id).order_by(func.count(User.user_id).desc()).all() else: top_tweeters = db.session.query(User.user_id, User.user_scrname,\ User.user_cap_perc, func.count(User.user_scrname)).\ join(Post.user).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(User.user_id).order_by(func.count(User.user_id).desc()).all() #INSERT TOP 100 ITEMS INTO SHADOW TABLE (TABLE_NEW) counter = 1 for item in top_tweeters[0:100]: if item[2]: conn.execute("INSERT INTO {0} VALUES ({1}, '{2}', '{3}', {4}, {5});".\ format(table_new, counter, item[0], item[1],\ item[2], item[3])) counter += 1 else: conn.execute("INSERT INTO {0} VALUES ({1}, '{2}', '{3}', {4}, {5});".\ format(table_new, counter, item[0], item[1],\ '-1.0', item[3])) counter += 1 #RENAME EXISTING (MAIN) TABLE TO OLD, RENAME SHADOW TO MAIN conn.execute('RENAME TABLE {0} TO {1}, {2} TO {3}'.\ format(table, table_old, table_new, table)) conn.close()
from app import app, db from app.models import * from app.tweepy_cred import * import botometer import time from decimal import Decimal, ROUND_HALF_UP from app.helpers import stringtime from sqlalchemy import Column, Integer, String, Float, func pass_list = ['immelza'] str_time_range = stringtime(14) #GET ALL USER OBJECTS, in order of posting volume last 14 days # users = db.session.query(User.user_scrname, func.count(User.user_scrname)).\ # join(Post.user).\ # filter(Post.created_at >= str_time_range).\ # group_by(User.user_scrname).\ # order_by(func.count(User.user_scrname).desc()).all() # users = db.session.query(User.user_scrname, func.count(User.user_scrname)).\ # join(Post.user).join(Post.districts).\ # filter(Post.created_at >= str_time_range).\ # filter(District.district_name == "TXSen").\ # group_by(User.user_scrname).\ # order_by(func.count(User.user_scrname).desc()).all()
def district(dynamic): print('starting district {}'.format(dynamic)) time_delta = request.args.get('time_delta') url = request.path str_time_range = stringtime(time_delta) today = str_today() #Set str_today within page call, so is correct (today) # NOTE: Possibly faster to do this w/i hash_pickled db lookup #today = datetime.combine(date.today(), datetime.min.time()) #datetime object for midnight #str_today = today.strftime("%Y-%m-%d %H:%M:%S") # string version of midnight # Most frequently used hashtags column dist_hashes = db.session.query(Hashtag.hashtag, func.count(Hashtag.hashtag)).\ join(Post.districts).join(Post.hashtags).\ filter(District.district_name==dynamic).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(Hashtag.hashtag).\ order_by(func.count(Hashtag.hashtag).desc()).all() print('got dist_hashes') # Most active tweeters column top_tweeters = db.session.query(User.user_scrname, func.count(User.user_scrname),\ User.user_cap_perc, User.user_id).\ join(Post.user).join(Post.districts).\ filter(District.district_name==dynamic).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(User.user_id).\ order_by(func.count(User.user_id).desc()).all() print('got top_tweeters') # Most frequently retweeted users column ) most_retweeted = db.session.query(Post.original_author_scrname, \ func.count(Post.original_author_scrname)).\ join(Post.districts).\ filter(District.district_name==dynamic).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ filter(Post.original_author_scrname != "").\ group_by(Post.original_author_scrname).\ order_by(func.count(Post.original_author_scrname).desc()).all() #Get botscore for top five most-retweeted users, create list of [name, retweet #numbers, botscore] to send to template most_retweeted_list = [] for item in most_retweeted[0:5]: tweeter = [] tweeter.append(item[0]) tweeter.append(item[1]) # Get botscore for original authors botscore = db.session.query(User.user_cap_perc).\ filter(User.user_scrname==item[0]).first() if botscore: tweeter.append(botscore[0]) else: tweeter.append("Not yet in database") most_retweeted_list.append(tweeter) print('got most_retweeted') # Most retweeted tweets column # Gets list of tweets in time period, ordered by most-retweeted (NOTE: many # or most of these retweets may be previous to this period) # most_retweeted_tweets = db.session.query(Post.post_id, Post.original_author_scrname, \ # Post.retweet_count, Post.original_tweet_id, User.user_scrname, Post.tweet_html, # Post.text, Post.original_text).\ # join(Post.districts).join(Post.user).\ # filter(District.district_name==dynamic).filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < str_today).\ # order_by(Post.retweet_count.desc()).all() # # # Use helper function to Get botscore for top five most-retweeted tweets, # # create list of [post_id, scrname, retweet count, botscore, post_html] # # most_retweeted_tweet_list = get_tweet_list(most_retweeted_tweets, dynamic) #most_retweeted_tweet_list_dated = get_tweet_list_dated(db_search_object, time_delta) print('got most_retweeted_list') # Get tweets most retweeted in this time period (by counting actual apperances) most_retweeted_inperiod = db.session.query(Post.original_tweet_id,\ func.count(Post.original_tweet_id)).\ join(Post.districts).\ filter(Post.is_retweet == 1).filter(District.district_name==dynamic).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(Post.original_tweet_id, Post.original_author_scrname).\ order_by(func.count(Post.original_tweet_id).desc()).all() most_retweeted_inperiod_list = get_tweet_list_inperiod( most_retweeted_inperiod) print('got most_retweeted_list_inperiod') #Get basic district object for district info dist_obj = db.session.query(District.state_fullname, District.district, \ District.incumbent, District.incumbent_party, District.clinton_2016, \ District.trump_2016, District.dem_candidate, District.rep_candidate).\ filter(District.district_name==dynamic).first() print('got dist_obj') #Using 3-day index for top-row hashtags to spotlight #hash_table_rows = gf.get_hash_rows(dynamic) hash_pickled = db.session.query(District_graphs.chart_rows).\ filter(District_graphs.reference_date==today).\ filter(District_graphs.district_name==dynamic).first() if hash_pickled != None: hash_table_rows = pickle.loads(hash_pickled[0]) else: hash_table_rows = gf.get_hash_rows(dynamic) print(hash_table_rows) print('got chart_rows') return render_template('district.html', dynamic=dynamic, time_delta=time_delta, \ url=url, dist_hashes=dist_hashes, top_tweeters=top_tweeters, \ most_retweeted=most_retweeted, \ t_form=ChangeTimeForm(), get_tweet=get_tweet, dist_obj=dist_obj, \ test_insert=test_insert, distlist=distlist, hash_table_rows=hash_table_rows,\ most_retweeted_list=most_retweeted_list, \ most_retweeted_inperiod_list=most_retweeted_inperiod_list)
def screen_name(dynamic): print('starting screen name {}'.format(dynamic)) time_delta = request.args.get('time_delta') url = request.path str_time_range = stringtime(time_delta) today = str_today() # What hashtags are used most frequently by this screen name top_hashtags = db.session.query(Hashtag.hashtag, func.count(Hashtag.hashtag)).\ join(Post.user).join(Post.hashtags).\ filter(User.user_scrname == dynamic).\ filter(Post.created_at_dt >= str_time_range).\ filter(Post.created_at_dt < today).\ group_by(Hashtag.hashtag).\ order_by(func.count(Hashtag.hashtag).desc()).all() # Which districts are referenced most frequently by screen name top_districts = db.session.query(District.district_name, \ func.count(District.district_name)).\ join(Post.districts).join(Post.user).\ filter(User.user_scrname == dynamic).\ filter(Post.created_at_dt >= str_time_range).\ filter(Post.created_at_dt < today).\ group_by(District.district_name).\ order_by(func.count(District.district_name).\ desc()).all() user_obj = db.session.query(User).filter( User.user_scrname == dynamic).first() #Turn user_created into datetime obj for use with Moment if user_obj: user_created_date = get_tweet_datetime(user_obj.user_created) # Who has this user most frequently retweeted in this time period? retweeted_users_period = db.session.query(Post.original_author_scrname, \ func.count(Post.original_author_scrname)).\ join(Post.user).\ filter(User.user_scrname == dynamic).\ filter(Post.created_at_dt >= str_time_range).\ filter(Post.created_at_dt < today).\ filter(Post.original_author_scrname != "").\ group_by(Post.original_author_scrname).\ order_by(func.count(Post.original_author_scrname).desc()).all() # Who has this user most frequently retweeted overall (in accessible db)? retweeted_users_total = db.session.query(Post.original_author_scrname, \ func.count(Post.original_author_scrname)).\ join(Post.user).\ filter(User.user_scrname == dynamic).\ filter(Post.original_author_scrname != "").\ group_by(Post.original_author_scrname).\ order_by(func.count(Post.original_author_scrname).desc()).all() # Who has retweeted this user the most? who_retweets = db.session.query(User.user_scrname, \ func.count(User.user_scrname)).\ join(Post.user).\ filter(Post.original_author_scrname == dynamic).\ filter(Post.created_at_dt >= str_time_range).\ filter(Post.created_at_dt < today).\ group_by(User.user_scrname).\ order_by(func.count(User.user_scrname).desc()).all() #Get top retweet (retweet count reflects original post_ NOTE: needs work on authors #idea: filtering by orig_author, only getting retweets, always with dynamic #screenname. Thus no User needed. Remove?) # All posts that are retweets, and have this user as original_author_scrname most_retweeted_tweets = db.session.query(Post.post_id, Post.original_author_scrname, \ Post.retweet_count, Post.original_tweet_id, Post.tweet_html, Post.text, Post.original_text).\ filter(Post.original_author_scrname==dynamic).\ filter(Post.created_at_dt >= str_time_range).\ filter(Post.created_at_dt < today).\ order_by(Post.retweet_count.desc()).all() # Use helper function to Get botscore for top five most-retweeted tweets, # create list of [post_id, name, retweet numbers, botscore] most_retweeted_tweet_list = get_tweet_list_nodist(most_retweeted_tweets) # most_retweeted_inperiod = db.session.query(Post.original_tweet_id,\ # func.count(Post.original_tweet_id)).\ # join(Post.user).\ # filter(Post.is_retweet == 1).filter(User.user_name==dynamic).\ # filter(Post.created_at_dt >= str_time_range).\ # group_by(Post.original_tweet_id).\ # order_by(func.count(Post.original_tweet_id).desc()).all() # # most_retweeted_inperiod_list = get_tweet_list_inperiod(most_retweeted_inperiod) #Get data for double-7 chart scrname_chart = gf.scrname_chart(dynamic) hticks = [ scrname_chart[0][0], scrname_chart[1][0], scrname_chart[2][0], scrname_chart[3][0], scrname_chart[4][0], scrname_chart[5][0], scrname_chart[6][0], scrname_chart[7][0], scrname_chart[8][0], scrname_chart[9][0] ] if user_obj: return render_template('screen_name.html', t_form=ChangeTimeForm(), \ dynamic=dynamic, url=url, time_delta=time_delta, top_hashtags=top_hashtags, \ top_districts=top_districts, user_obj=user_obj, \ retweeted_users_period=retweeted_users_period, \ retweeted_users_total=retweeted_users_total, who_retweets=who_retweets,\ user_created_date=user_created_date, most_retweeted_tweets=most_retweeted_tweets,\ scrname_chart=scrname_chart, hticks=hticks, get_tweet=get_tweet, \ most_retweeted_tweet_list=most_retweeted_tweet_list) else: return render_template('doesnt_exist.html', dynamic=dynamic)
def overview(dynamic): print('starting district group {}'.format(dynamic)) time_delta = request.args.get('time_delta') url = request.path str_time_range = stringtime(time_delta) today = datetime.combine( date.today(), datetime.min.time()) #datetime object for midnight str_today = today.strftime( "%Y-%m-%d %H:%M:%S") # string version of midnight conn = db.engine.connect() # GET 10-day table data for hashtag chart from graph_functions module # RETURN THIS TO TEMPLATE # Get a desc-ordered list of all hashtags being used in all districts # Object returns (hashtag, count) all_hashes_result = conn.execute('SELECT hashtag, count FROM hash_activity_{0}_{1};'.\ format(dynamic, time_delta)).fetchall() #RETURN THIS TO TEMPLATE all_hashes = [] for item in all_hashes_result: all_hashes.append(item) print("got all_hashes") #Get a list of tophashtags without district names # RETURN THIS TO TEMPLATE hashes_no_dists = [] counter = 0 for item in all_hashes: if item[0] not in distlist: hashes_no_dists.append(item) counter += 1 if counter == 20: break # RETURN THIS TO TEMPLATE all_tweets = db.session.query(func.count(Post.post_id)).\ filter(Post.created_at_dt >= str_time_range).first() print("got all tweets") #Get list of most active districts active_dists_result = conn.execute('SELECT district_name, count FROM dist_activity_{0}_{1};'.\ format(dynamic, time_delta)).fetchall() # RETURN THIS TO TEMPLATE most_active = [] for item in active_dists_result: most_active.append(item) print("got most_active") # Get a desc-ordered list of top-volume Tweeters top_tweeters_result = conn.execute('SELECT user_scrname, count, user_cap_perc FROM top_tweeters_{0}_{1};'.\ format(dynamic, time_delta)).fetchall() # RETURN THIS TO TEMPLATE top_tweeters = [] for item in top_tweeters_result: new_row = [] new_row.append(item[0]) new_row.append(item[1]) if item[2] == -1.0: new_row.append("Not yet in database") else: new_row.append(item[2]) top_tweeters.append(new_row) print("got top_tweeters") retweeted_users_result = conn.execute('SELECT original_author_scrname, count FROM retweeted_users_{0}_{1};'.\ format(dynamic, time_delta)).fetchall() # RETURN THIS TO TEMPLATE retweeted_users = [] for item in retweeted_users_result: retweeted_users.append(item) print("got retweeted users") retweeted_tweets_result = conn.execute('SELECT post_id, original_poster, retweet_count, botscore FROM retweeted_tweets_{0}_{1};'.\ format(dynamic, time_delta)).fetchall() # RETURN THIS TO TEMPLATE most_retweeted_tweet_list = [] tweet_count = 0 # Each item (tuple) is post_id, poster, count, botscore. Still need HTML for item in retweeted_tweets_result: # if original_author_scrname is in skip list, skip it if item[1].lower() in skip_list: continue #create list with attributes holding_list = [] for attribute in item: holding_list.append(attribute) # Get Tweet HTML, add to list try: tweet_text = get_tweet(item[0]) holding_list.append(tweet_text) except: holding_list.append("Can't retrieve tweet") most_retweeted_tweet_list.append(holding_list) tweet_count += 1 if tweet_count == 5: break print("got most retweeted tweets") hash_pickled = db.session.query(District_graphs.chart_rows).\ filter(District_graphs.reference_date==str_today).\ filter(District_graphs.district_name==dynamic).first() if hash_pickled != None: hashtable_all = pickle.loads(hash_pickled[0]) else: hashtable_all = gf.get_hashrows_overview(dynamic) print("got hashtable") conn.close() return render_template('overview.html', t_form=ChangeTimeForm(), \ all_form=AllCongSearchForm(), dynamic=dynamic, time_delta=time_delta, \ url=url, all_hashes=all_hashes, hashes_no_dists=hashes_no_dists, \ hashtable_all=hashtable_all, all_tweets=all_tweets,\ most_retweeted_tweet_list=most_retweeted_tweet_list,\ top_tweeters=top_tweeters, retweeted_users=retweeted_users, most_active=most_active\ )
def hashtag(dynamic): print('starting hashtag {}'.format(dynamic)) time_delta = request.args.get('time_delta') url = request.path str_time_range = stringtime(time_delta) #Where is this hashtag used column top_districts=db.session.query(District.district_id, District.district_name, \ func.count(District.district_id), District.state_fullname, District.district).\ join(Post.districts).join(Post.hashtags).\ filter(Hashtag.hashtag == dynamic).filter(Post.created_at_dt >= str_time_range).\ group_by(District.district_id).order_by(func.count(District.district_id).\ desc()).all() print("got top districts") # "Who uses this hashtag" column top_users=db.session.query(User.user_scrname, func.count(User.user_scrname), \ User.user_cap_perc, User.user_id).\ join(Post.user).join(Post.hashtags).\ filter(Hashtag.hashtag == dynamic).filter(Post.created_at_dt >= str_time_range).\ group_by(User.user_id).\ order_by(func.count(User.user_id).desc()).all() print("got top users") # Data for positive/negative chart valences = db.session.query(Post.polarity_val, func.count(Post.polarity_val)).\ join(Post.hashtags).\ filter(Hashtag.hashtag == dynamic).filter(Post.created_at_dt >= str_time_range).\ group_by(Post.polarity_val).all() valences_datatable = [['Attitude', 'Number of Tweets']] for item in valences: valences_datatable.append([item[0], item[1]]) print("got valences") # # Most retweeted tweets column # most_retweeted_tweets = db.session.query(Post.post_id, Post.original_author_scrname, \ # Post.retweet_count, Post.original_tweet_id, User.user_scrname, Post.tweet_html, # Post.text, Post.original_text).\ # join(Post.hashtags).join(Post.user).\ # filter(Hashtag.hashtag==dynamic).filter(Post.created_at_dt >= str_time_range).\ # order_by(Post.retweet_count.desc()).all() # # print("got most retweeted tweets") # # #Get botscore for top five most-retweeted tweets, create list of [post_id, name, # # retweet numbers, botscore] to send to template # most_retweeted_tweet_list = get_tweet_list_nodist(most_retweeted_tweets) # # print("got most retweeted tweets") # Get tweets most retweeted in this time period (by counting actual apperances) most_retweeted_inperiod = db.session.query(Post.original_tweet_id,\ func.count(Post.original_tweet_id)).\ join(Post.hashtags).\ filter(Post.is_retweet == 1).filter(Hashtag.hashtag==dynamic).\ filter(Post.created_at_dt >= str_time_range).\ group_by(Post.original_tweet_id).\ order_by(func.count(Post.original_tweet_id).desc()).all() most_retweeted_inperiod_list = get_tweet_list_inperiod( most_retweeted_inperiod) return render_template('hashtag.html', t_form=ChangeTimeForm(), url=url, \ dynamic=dynamic, time_delta=time_delta, top_districts=top_districts, \ top_users=top_users, valences=valences, valences_datatable=valences_datatable,\ get_tweet=get_tweet, most_retweeted_inperiod_list=most_retweeted_inperiod_list )