def botspy(dynamic): time_delta = request.args.get('time_delta') url = request.path str_time_range = stringtime(time_delta) today = str_today() most_active = db.session.query(User.user_scrname, User.user_cap_perc,\ func.count(Post.post_id), User.user_id).\ join(Post.user).\ filter(User.user_cap_perc >= 43.0).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(User.user_id).order_by(func.count(Post.post_id).desc()).all() bot_hashtags = db.session.query(Hashtag.hashtag, func.count(Hashtag.hashtag)).\ join(Post.user).join(Post.hashtags).\ filter(User.user_cap_perc >= 43.0).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(Hashtag.hashtag).order_by(func.count(Hashtag.hashtag).desc()).all() most_retweeted_tweets = db.session.query(Post.post_id, Post.original_author_scrname, \ Post.retweet_count, Post.original_tweet_id, User.user_scrname, Post.tweet_html,\ Post.text, Post.original_text).\ join(Post.user).\ filter(User.user_cap_perc >= 43.0).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ filter(Post.is_retweet == 0).\ order_by(Post.retweet_count.desc()).all() most_active_districts = db.session.query(District.district_name,\ func.count(District.district_name)).\ join(Post.user).join(Post.districts).\ filter(User.user_cap_perc >= 43.0).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(District.district_name).\ order_by(func.count(District.district_name).desc()).all() most_retweeted_tweet_list = get_tweet_list_nodist(most_retweeted_tweets) popular_bot = db.session.query(User.user_scrname, User.user_followers,\ User.user_id).\ filter(User.user_cap_perc >= 43.0).\ group_by(User.user_id).order_by(User.user_followers.desc()).all() avg_bot_raw = db.session.query(func.avg(User.user_followers)).\ filter(User.user_cap_perc >= 43.0).\ first() avg_bot = int(avg_bot_raw[0]) #Get botweather chart data from graph_functions modules botchart = gf.botweather_chart() return render_template('botspy.html', time_delta=time_delta,\ most_active=most_active, bot_hashtags=bot_hashtags, \ most_retweeted_tweets=most_retweeted_tweets, popular_bot=popular_bot,\ avg_bot=avg_bot, botchart=botchart, get_tweet=get_tweet, \ most_retweeted_tweet_list=most_retweeted_tweet_list,\ most_active_districts=most_active_districts)
def botweather_chart(): this_top_line = ["Date", "No. of posts"] rows = [] #start with midnight of current date as endtime end_date = str_today() shrt_end_date = date.today().strftime('%b %d') #Create container for individual rows new_row = [] #Populate other rows with hashtag quantities by date #Top level loop through dates for x in range(1, 11): beg_date = get_beg_date(x) #add date as first item of row - use start date(midnight) fpr full day new_row.append(beg_date[1]) #Search for # of botposts between begdate and enddate date_botpost_count = db.session.query(func.count(Post.post_id)).\ join(Post.user).\ filter(User.user_cap_perc >= 60.0).\ filter(Post.created_at > beg_date[0]).filter(Post.created_at <= end_date).\ first() #Add to day's row new_row.append(date_botpost_count[0]) #Add new row to rows, reset new_row, move end-time back a day rows.append(new_row) new_row = [] end_date = beg_date[0] shrt_end_date = beg_date[1] print('Finished with day -{}'.format(x)) rows.append(this_top_line) rows.reverse() print(rows) return rows
from app import app, db from app.models import * from sqlalchemy import func, Date import pprint # STRATEGY: # mysql> DROP TABLE IF EXISTS my_summary_new, my_summary_old; # mysql> CREATE TABLE my_summary_new LIKE my_summary; # -- populate my_summary_new as desired # mysql> RENAME TABLE my_summary TO my_summary_old, my_summary_new TO my_summary; today = str_today() #NOTE: adding dist_group = allcong, allsen, allraces def fill_dist_activity(dist_group, time_delta, table, table_new, table_old): if dist_group == "allcong": dist_fig = 1 if dist_group == "allsen": dist_fig = 2 #CREATE CONNECTION FOR DIRECT DB ACCESS conn = db.engine.connect() print("connected") conn.execute("DROP TABLE IF EXISTS {0}, {1};".format(table_new, table_old))
def scrname_chart(screen_name): this_top_line = ["Date", "No. of original posts", "No. of retweets"] rows = [] #start with midnight of current day as endtime end_date = str_today() shrt_end_date = date.today().strftime('%b %d') #Create container for individual rows new_row = [] #Populate other rows with hashtag quantities by date # Loop through day-long periods, starting from today for x in range(1, 11): beg_date = get_beg_date(x) last_chart_date = str_today() #add short date version as first item of row new_row.append(shrt_end_date) # #Loop through week-long periods, starting from today # for x in range(7, 71, 7): # beg_date = get_beg_date(x) # last_chart_date = shrt_end_date # # #add short date version as first item of row # new_row.append(shrt_end_date) # # #Search for # of original posts between begdate and enddate date_origpost_count = db.session.query(func.count(Post.post_id)).\ join(Post.user).\ filter(User.user_scrname == screen_name).filter(Post.is_retweet == 0).\ filter(Post.created_at > beg_date[0]).filter(Post.created_at <= end_date).\ first() #Add to day's row new_row.append(date_origpost_count[0]) date_repost_count = db.session.query(func.count(Post.post_id)).\ join(Post.user).\ filter(User.user_scrname == screen_name).filter(Post.is_retweet == 1).\ filter(Post.created_at > beg_date[0]).filter(Post.created_at <= end_date).\ first() new_row.append(date_repost_count[0]) #Add new row to rows, reset new_row, move end-time back a day rows.append(new_row) new_row = [] end_date = beg_date[0] shrt_end_date = beg_date[1] print('Finished with day -{}'.format(x)) # rows.append(this_top_line) #NOTE:Not necessary for current double-y config rows.reverse() return rows
def get_hash_rows(this_district): #Container for all rows rows = [] #Get top line (first row) this_top_line = top_line_generic(this_district, var_type='hashtags') #start with midnight of current day as endtime end_date = str_today() shrt_end_date = date.today().strftime('%b %d') #Create container for individual rows new_row = [] #Populate other rows with hashtag quantities by date #Top level loop through dates for x in range(1, 11): beg_date = get_beg_date(x) #add short date version as first item of row new_row.append(beg_date[1]) #For each date, iterate through seperate hashtag query for date range for this_hashtag in this_top_line[1:]: date_hash_num = db.session.query(func.count(Hashtag.hashtag)).\ join(Post.districts).join(Post.hashtags).\ filter(District.district_name==this_district).filter(Hashtag.hashtag==this_hashtag).\ filter(Post.created_at > beg_date[0]).filter(Post.created_at <= end_date).first() new_row.append(date_hash_num[0]) print('Finished with hashtag: {}'.format(this_hashtag)) #Add new row to rows, reset new_row, move end-time back a day rows.append(new_row) new_row = [] end_date = beg_date[0] shrt_end_date = beg_date[1] print('Finished with day -{}'.format(x)) rows.append(this_top_line) rows.reverse() rows_pickled = pickle.dumps(rows) # get district row as object if exists check = db.session.query(District_graphs).\ filter(District_graphs.reference_date==str_today()).\ filter(District_graphs.district_name==this_district).first() #IF district row for today already exists, update if check != None: check.chart_rows = rows_pickled try: db.session.add(check) db.session.commit() except: db.session.rollback() else: hash_add = District_graphs(str_today(), this_district, rows_pickled) try: db.session.add(hash_add) db.session.commit() except: db.session.rollback() return rows
def get_hashrows_overview(distgroup): #Container for all rows rows = [] #Get top line (first row) this_top_line = top_line_all(distgroup=distgroup, var_type='hashtags') #start with midnight of current day as endtime end_date = str_today() shrt_end_date = date.today().strftime('%b %d') #Create container for individual rows new_row = [] if distgroup == "allcong": dist_fig = 1 elif distgroup == "allsen": dist_fig = 2 elif distgroup == "allraces": dist_fig = 3 #Populate other rows with hashtag quantities by date #Top level loop through dates for x in range(1, 11): # Using X as time_delta, get beg date of -1, -2, etc ... days # Function returns (midnight, display date) beg_date = get_beg_date(x) #add short date version as first item of row new_row.append(beg_date[1]) #For each date, iterate through seperate hashtag query for date range for this_hashtag in this_top_line[1:]: if distgroup == "allcong" or distgroup == "allsen": date_hash_num = db.session.query(func.count(Hashtag.hashtag)).\ join(Post.hashtags).join(Post.districts).\ filter(Hashtag.hashtag==this_hashtag).\ filter(District.dist_type==dist_fig).\ filter(Post.created_at > beg_date[0]).\ filter(Post.created_at <= end_date).first() else: date_hash_num = db.session.query(func.count(Hashtag.hashtag)).\ join(Post.hashtags).\ filter(Hashtag.hashtag==this_hashtag).\ filter(Post.created_at > beg_date[0]).\ filter(Post.created_at <= end_date).first() new_row.append(date_hash_num[0]) print('Finished with hashtag: {}'.format(this_hashtag)) #Add new row to rows, reset new_row, move end-time back a day rows.append(new_row) new_row = [] end_date = beg_date[0] shrt_end_date = beg_date[1] print('Finished with day -{}'.format(x)) rows.append(this_top_line) #Reverse so that earliest row is first, latest last rows.reverse() rows_pickled = pickle.dumps(rows) # get district row as object if exists check = db.session.query(District_graphs).\ filter(District_graphs.reference_date==str_today()).\ filter(District_graphs.district_name==distgroup).first() #IF district row for today already exists, update if check != None: check.chart_rows = rows_pickled try: db.session.add(check) db.session.commit() except: db.session.rollback() else: hash_add = District_graphs(str_today(), distgroup, rows_pickled) try: db.session.add(hash_add) db.session.commit() except: db.session.rollback()
def district(dynamic): print('starting district {}'.format(dynamic)) time_delta = request.args.get('time_delta') url = request.path str_time_range = stringtime(time_delta) today = str_today() #Set str_today within page call, so is correct (today) # NOTE: Possibly faster to do this w/i hash_pickled db lookup #today = datetime.combine(date.today(), datetime.min.time()) #datetime object for midnight #str_today = today.strftime("%Y-%m-%d %H:%M:%S") # string version of midnight # Most frequently used hashtags column dist_hashes = db.session.query(Hashtag.hashtag, func.count(Hashtag.hashtag)).\ join(Post.districts).join(Post.hashtags).\ filter(District.district_name==dynamic).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(Hashtag.hashtag).\ order_by(func.count(Hashtag.hashtag).desc()).all() print('got dist_hashes') # Most active tweeters column top_tweeters = db.session.query(User.user_scrname, func.count(User.user_scrname),\ User.user_cap_perc, User.user_id).\ join(Post.user).join(Post.districts).\ filter(District.district_name==dynamic).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(User.user_id).\ order_by(func.count(User.user_id).desc()).all() print('got top_tweeters') # Most frequently retweeted users column ) most_retweeted = db.session.query(Post.original_author_scrname, \ func.count(Post.original_author_scrname)).\ join(Post.districts).\ filter(District.district_name==dynamic).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ filter(Post.original_author_scrname != "").\ group_by(Post.original_author_scrname).\ order_by(func.count(Post.original_author_scrname).desc()).all() #Get botscore for top five most-retweeted users, create list of [name, retweet #numbers, botscore] to send to template most_retweeted_list = [] for item in most_retweeted[0:5]: tweeter = [] tweeter.append(item[0]) tweeter.append(item[1]) # Get botscore for original authors botscore = db.session.query(User.user_cap_perc).\ filter(User.user_scrname==item[0]).first() if botscore: tweeter.append(botscore[0]) else: tweeter.append("Not yet in database") most_retweeted_list.append(tweeter) print('got most_retweeted') # Most retweeted tweets column # Gets list of tweets in time period, ordered by most-retweeted (NOTE: many # or most of these retweets may be previous to this period) # most_retweeted_tweets = db.session.query(Post.post_id, Post.original_author_scrname, \ # Post.retweet_count, Post.original_tweet_id, User.user_scrname, Post.tweet_html, # Post.text, Post.original_text).\ # join(Post.districts).join(Post.user).\ # filter(District.district_name==dynamic).filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < str_today).\ # order_by(Post.retweet_count.desc()).all() # # # Use helper function to Get botscore for top five most-retweeted tweets, # # create list of [post_id, scrname, retweet count, botscore, post_html] # # most_retweeted_tweet_list = get_tweet_list(most_retweeted_tweets, dynamic) #most_retweeted_tweet_list_dated = get_tweet_list_dated(db_search_object, time_delta) print('got most_retweeted_list') # Get tweets most retweeted in this time period (by counting actual apperances) most_retweeted_inperiod = db.session.query(Post.original_tweet_id,\ func.count(Post.original_tweet_id)).\ join(Post.districts).\ filter(Post.is_retweet == 1).filter(District.district_name==dynamic).\ filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\ group_by(Post.original_tweet_id, Post.original_author_scrname).\ order_by(func.count(Post.original_tweet_id).desc()).all() most_retweeted_inperiod_list = get_tweet_list_inperiod( most_retweeted_inperiod) print('got most_retweeted_list_inperiod') #Get basic district object for district info dist_obj = db.session.query(District.state_fullname, District.district, \ District.incumbent, District.incumbent_party, District.clinton_2016, \ District.trump_2016, District.dem_candidate, District.rep_candidate).\ filter(District.district_name==dynamic).first() print('got dist_obj') #Using 3-day index for top-row hashtags to spotlight #hash_table_rows = gf.get_hash_rows(dynamic) hash_pickled = db.session.query(District_graphs.chart_rows).\ filter(District_graphs.reference_date==today).\ filter(District_graphs.district_name==dynamic).first() if hash_pickled != None: hash_table_rows = pickle.loads(hash_pickled[0]) else: hash_table_rows = gf.get_hash_rows(dynamic) print(hash_table_rows) print('got chart_rows') return render_template('district.html', dynamic=dynamic, time_delta=time_delta, \ url=url, dist_hashes=dist_hashes, top_tweeters=top_tweeters, \ most_retweeted=most_retweeted, \ t_form=ChangeTimeForm(), get_tweet=get_tweet, dist_obj=dist_obj, \ test_insert=test_insert, distlist=distlist, hash_table_rows=hash_table_rows,\ most_retweeted_list=most_retweeted_list, \ most_retweeted_inperiod_list=most_retweeted_inperiod_list)
def screen_name(dynamic): print('starting screen name {}'.format(dynamic)) time_delta = request.args.get('time_delta') url = request.path str_time_range = stringtime(time_delta) today = str_today() # What hashtags are used most frequently by this screen name top_hashtags = db.session.query(Hashtag.hashtag, func.count(Hashtag.hashtag)).\ join(Post.user).join(Post.hashtags).\ filter(User.user_scrname == dynamic).\ filter(Post.created_at_dt >= str_time_range).\ filter(Post.created_at_dt < today).\ group_by(Hashtag.hashtag).\ order_by(func.count(Hashtag.hashtag).desc()).all() # Which districts are referenced most frequently by screen name top_districts = db.session.query(District.district_name, \ func.count(District.district_name)).\ join(Post.districts).join(Post.user).\ filter(User.user_scrname == dynamic).\ filter(Post.created_at_dt >= str_time_range).\ filter(Post.created_at_dt < today).\ group_by(District.district_name).\ order_by(func.count(District.district_name).\ desc()).all() user_obj = db.session.query(User).filter( User.user_scrname == dynamic).first() #Turn user_created into datetime obj for use with Moment if user_obj: user_created_date = get_tweet_datetime(user_obj.user_created) # Who has this user most frequently retweeted in this time period? retweeted_users_period = db.session.query(Post.original_author_scrname, \ func.count(Post.original_author_scrname)).\ join(Post.user).\ filter(User.user_scrname == dynamic).\ filter(Post.created_at_dt >= str_time_range).\ filter(Post.created_at_dt < today).\ filter(Post.original_author_scrname != "").\ group_by(Post.original_author_scrname).\ order_by(func.count(Post.original_author_scrname).desc()).all() # Who has this user most frequently retweeted overall (in accessible db)? retweeted_users_total = db.session.query(Post.original_author_scrname, \ func.count(Post.original_author_scrname)).\ join(Post.user).\ filter(User.user_scrname == dynamic).\ filter(Post.original_author_scrname != "").\ group_by(Post.original_author_scrname).\ order_by(func.count(Post.original_author_scrname).desc()).all() # Who has retweeted this user the most? who_retweets = db.session.query(User.user_scrname, \ func.count(User.user_scrname)).\ join(Post.user).\ filter(Post.original_author_scrname == dynamic).\ filter(Post.created_at_dt >= str_time_range).\ filter(Post.created_at_dt < today).\ group_by(User.user_scrname).\ order_by(func.count(User.user_scrname).desc()).all() #Get top retweet (retweet count reflects original post_ NOTE: needs work on authors #idea: filtering by orig_author, only getting retweets, always with dynamic #screenname. Thus no User needed. Remove?) # All posts that are retweets, and have this user as original_author_scrname most_retweeted_tweets = db.session.query(Post.post_id, Post.original_author_scrname, \ Post.retweet_count, Post.original_tweet_id, Post.tweet_html, Post.text, Post.original_text).\ filter(Post.original_author_scrname==dynamic).\ filter(Post.created_at_dt >= str_time_range).\ filter(Post.created_at_dt < today).\ order_by(Post.retweet_count.desc()).all() # Use helper function to Get botscore for top five most-retweeted tweets, # create list of [post_id, name, retweet numbers, botscore] most_retweeted_tweet_list = get_tweet_list_nodist(most_retweeted_tweets) # most_retweeted_inperiod = db.session.query(Post.original_tweet_id,\ # func.count(Post.original_tweet_id)).\ # join(Post.user).\ # filter(Post.is_retweet == 1).filter(User.user_name==dynamic).\ # filter(Post.created_at_dt >= str_time_range).\ # group_by(Post.original_tweet_id).\ # order_by(func.count(Post.original_tweet_id).desc()).all() # # most_retweeted_inperiod_list = get_tweet_list_inperiod(most_retweeted_inperiod) #Get data for double-7 chart scrname_chart = gf.scrname_chart(dynamic) hticks = [ scrname_chart[0][0], scrname_chart[1][0], scrname_chart[2][0], scrname_chart[3][0], scrname_chart[4][0], scrname_chart[5][0], scrname_chart[6][0], scrname_chart[7][0], scrname_chart[8][0], scrname_chart[9][0] ] if user_obj: return render_template('screen_name.html', t_form=ChangeTimeForm(), \ dynamic=dynamic, url=url, time_delta=time_delta, top_hashtags=top_hashtags, \ top_districts=top_districts, user_obj=user_obj, \ retweeted_users_period=retweeted_users_period, \ retweeted_users_total=retweeted_users_total, who_retweets=who_retweets,\ user_created_date=user_created_date, most_retweeted_tweets=most_retweeted_tweets,\ scrname_chart=scrname_chart, hticks=hticks, get_tweet=get_tweet, \ most_retweeted_tweet_list=most_retweeted_tweet_list) else: return render_template('doesnt_exist.html', dynamic=dynamic)