コード例 #1
0
def botspy(dynamic):
    time_delta = request.args.get('time_delta')
    url = request.path
    str_time_range = stringtime(time_delta)

    today = str_today()

    most_active = db.session.query(User.user_scrname, User.user_cap_perc,\
    func.count(Post.post_id), User.user_id).\
    join(Post.user).\
    filter(User.user_cap_perc >= 43.0).\
    filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
    group_by(User.user_id).order_by(func.count(Post.post_id).desc()).all()

    bot_hashtags = db.session.query(Hashtag.hashtag, func.count(Hashtag.hashtag)).\
    join(Post.user).join(Post.hashtags).\
    filter(User.user_cap_perc >= 43.0).\
    filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
    group_by(Hashtag.hashtag).order_by(func.count(Hashtag.hashtag).desc()).all()

    most_retweeted_tweets = db.session.query(Post.post_id, Post.original_author_scrname, \
    Post.retweet_count, Post.original_tweet_id, User.user_scrname, Post.tweet_html,\
    Post.text, Post.original_text).\
    join(Post.user).\
    filter(User.user_cap_perc >= 43.0).\
    filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
    filter(Post.is_retweet == 0).\
    order_by(Post.retweet_count.desc()).all()

    most_active_districts = db.session.query(District.district_name,\
    func.count(District.district_name)).\
    join(Post.user).join(Post.districts).\
    filter(User.user_cap_perc >= 43.0).\
    filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
    group_by(District.district_name).\
    order_by(func.count(District.district_name).desc()).all()

    most_retweeted_tweet_list = get_tweet_list_nodist(most_retweeted_tweets)


    popular_bot = db.session.query(User.user_scrname, User.user_followers,\
    User.user_id).\
    filter(User.user_cap_perc >= 43.0).\
    group_by(User.user_id).order_by(User.user_followers.desc()).all()

    avg_bot_raw = db.session.query(func.avg(User.user_followers)).\
    filter(User.user_cap_perc >= 43.0).\
    first()

    avg_bot = int(avg_bot_raw[0])

    #Get botweather chart data from graph_functions modules
    botchart = gf.botweather_chart()

    return render_template('botspy.html', time_delta=time_delta,\
    most_active=most_active, bot_hashtags=bot_hashtags, \
    most_retweeted_tweets=most_retweeted_tweets, popular_bot=popular_bot,\
    avg_bot=avg_bot, botchart=botchart, get_tweet=get_tweet, \
    most_retweeted_tweet_list=most_retweeted_tweet_list,\
    most_active_districts=most_active_districts)
コード例 #2
0
def botweather_chart():
    this_top_line = ["Date", "No. of posts"]

    rows = []

    #start with midnight of current date as endtime


    end_date = str_today()
    shrt_end_date = date.today().strftime('%b %d')

    #Create container for individual rows
    new_row = []


    #Populate other rows with hashtag quantities by date

    #Top level loop through dates
    for x in range(1, 11):
        beg_date = get_beg_date(x)


        #add date as first item of row - use start date(midnight) fpr full day
        new_row.append(beg_date[1])

        #Search for # of botposts between begdate and enddate
        date_botpost_count = db.session.query(func.count(Post.post_id)).\
        join(Post.user).\
        filter(User.user_cap_perc >= 60.0).\
        filter(Post.created_at > beg_date[0]).filter(Post.created_at <= end_date).\
        first()

        #Add to day's row
        new_row.append(date_botpost_count[0])


        #Add new row to rows, reset new_row, move end-time back a day
        rows.append(new_row)
        new_row = []
        end_date = beg_date[0]
        shrt_end_date = beg_date[1]
        print('Finished with day -{}'.format(x))



    rows.append(this_top_line)
    rows.reverse()
    print(rows)
    return rows
コード例 #3
0
from app import app, db
from app.models import *
from sqlalchemy import func, Date
import pprint





# STRATEGY:
# mysql> DROP TABLE IF EXISTS my_summary_new, my_summary_old;
# mysql> CREATE TABLE my_summary_new LIKE my_summary;
# -- populate my_summary_new as desired
# mysql> RENAME TABLE my_summary TO my_summary_old, my_summary_new TO my_summary;

today = str_today()

#NOTE: adding dist_group = allcong, allsen, allraces

def fill_dist_activity(dist_group, time_delta, table, table_new, table_old):


    if dist_group == "allcong":
        dist_fig = 1
    if dist_group == "allsen":
        dist_fig = 2

    #CREATE CONNECTION FOR DIRECT DB ACCESS
    conn = db.engine.connect()
    print("connected")
    conn.execute("DROP TABLE IF EXISTS {0}, {1};".format(table_new, table_old))
コード例 #4
0
def scrname_chart(screen_name):
    this_top_line = ["Date", "No. of original posts", "No. of retweets"]

    rows = []

    #start with midnight of current day as endtime
    end_date = str_today()
    shrt_end_date = date.today().strftime('%b %d')

    #Create container for individual rows
    new_row = []


    #Populate other rows with hashtag quantities by date

    # Loop through day-long periods, starting from today
    for x in range(1, 11):
        beg_date = get_beg_date(x)
        last_chart_date = str_today()

        #add short date version as first item of row
        new_row.append(shrt_end_date)

    # #Loop through week-long periods, starting from today
    # for x in range(7, 71, 7):
    #     beg_date = get_beg_date(x)
    #     last_chart_date = shrt_end_date
    #
    #     #add short date version as first item of row
    #     new_row.append(shrt_end_date)
    #
    #     #Search for # of original posts between begdate and enddate




        date_origpost_count = db.session.query(func.count(Post.post_id)).\
        join(Post.user).\
        filter(User.user_scrname == screen_name).filter(Post.is_retweet == 0).\
        filter(Post.created_at > beg_date[0]).filter(Post.created_at <= end_date).\
        first()

        #Add to day's row
        new_row.append(date_origpost_count[0])

        date_repost_count = db.session.query(func.count(Post.post_id)).\
        join(Post.user).\
        filter(User.user_scrname == screen_name).filter(Post.is_retweet == 1).\
        filter(Post.created_at > beg_date[0]).filter(Post.created_at <= end_date).\
        first()

        new_row.append(date_repost_count[0])

        #Add new row to rows, reset new_row, move end-time back a day
        rows.append(new_row)
        new_row = []
        end_date = beg_date[0]
        shrt_end_date = beg_date[1]
        print('Finished with day -{}'.format(x))



    # rows.append(this_top_line) #NOTE:Not necessary for current double-y config
    rows.reverse()

    return rows
コード例 #5
0
def get_hash_rows(this_district):
    #Container for all rows
    rows = []

    #Get top line (first row)
    this_top_line = top_line_generic(this_district, var_type='hashtags')

    #start with midnight of current day as endtime
    end_date = str_today()
    shrt_end_date = date.today().strftime('%b %d')

    #Create container for individual rows
    new_row = []


    #Populate other rows with hashtag quantities by date

    #Top level loop through dates
    for x in range(1, 11):
        beg_date = get_beg_date(x)


        #add short date version as first item of row
        new_row.append(beg_date[1])


        #For each date, iterate through seperate hashtag query for date range
        for this_hashtag in this_top_line[1:]:


            date_hash_num = db.session.query(func.count(Hashtag.hashtag)).\
            join(Post.districts).join(Post.hashtags).\
            filter(District.district_name==this_district).filter(Hashtag.hashtag==this_hashtag).\
            filter(Post.created_at > beg_date[0]).filter(Post.created_at <= end_date).first()

            new_row.append(date_hash_num[0])
            print('Finished with hashtag: {}'.format(this_hashtag))

        #Add new row to rows, reset new_row, move end-time back a day
        rows.append(new_row)
        new_row = []
        end_date = beg_date[0]
        shrt_end_date = beg_date[1]
        print('Finished with day -{}'.format(x))



    rows.append(this_top_line)
    rows.reverse()


    rows_pickled = pickle.dumps(rows)

    # get district row as object if exists
    check = db.session.query(District_graphs).\
    filter(District_graphs.reference_date==str_today()).\
    filter(District_graphs.district_name==this_district).first()

    #IF district row for today already exists, update
    if check != None:
        check.chart_rows = rows_pickled
        try:
            db.session.add(check)
            db.session.commit()
        except:
            db.session.rollback()

    else:

        hash_add = District_graphs(str_today(), this_district, rows_pickled)

        try:
            db.session.add(hash_add)
            db.session.commit()
        except:
            db.session.rollback()



    return rows
コード例 #6
0
def get_hashrows_overview(distgroup):
    #Container for all rows

    rows = []

    #Get top line (first row)
    this_top_line = top_line_all(distgroup=distgroup, var_type='hashtags')

    #start with midnight of current day as endtime
    end_date = str_today()
    shrt_end_date = date.today().strftime('%b %d')

    #Create container for individual rows
    new_row = []

    if distgroup == "allcong":
        dist_fig = 1
    elif distgroup == "allsen":
        dist_fig = 2
    elif distgroup == "allraces":
        dist_fig = 3

    #Populate other rows with hashtag quantities by date

    #Top level loop through dates
    for x in range(1, 11):

        # Using X as time_delta, get beg date of -1, -2, etc ... days
        # Function returns (midnight, display date)
        beg_date = get_beg_date(x)


        #add short date version as first item of row
        new_row.append(beg_date[1])


        #For each date, iterate through seperate hashtag query for date range
        for this_hashtag in this_top_line[1:]:


            if distgroup == "allcong" or distgroup == "allsen":
                date_hash_num = db.session.query(func.count(Hashtag.hashtag)).\
                join(Post.hashtags).join(Post.districts).\
                filter(Hashtag.hashtag==this_hashtag).\
                filter(District.dist_type==dist_fig).\
                filter(Post.created_at > beg_date[0]).\
                filter(Post.created_at <= end_date).first()

            else:
                date_hash_num = db.session.query(func.count(Hashtag.hashtag)).\
                join(Post.hashtags).\
                filter(Hashtag.hashtag==this_hashtag).\
                filter(Post.created_at > beg_date[0]).\
                filter(Post.created_at <= end_date).first()

            new_row.append(date_hash_num[0])
            print('Finished with hashtag: {}'.format(this_hashtag))

        #Add new row to rows, reset new_row, move end-time back a day
        rows.append(new_row)
        new_row = []
        end_date = beg_date[0]
        shrt_end_date = beg_date[1]
        print('Finished with day -{}'.format(x))



    rows.append(this_top_line)

    #Reverse so that earliest row is first, latest last
    rows.reverse()

    rows_pickled = pickle.dumps(rows)

    # get district row as object if exists
    check = db.session.query(District_graphs).\
    filter(District_graphs.reference_date==str_today()).\
    filter(District_graphs.district_name==distgroup).first()

    #IF district row for today already exists, update
    if check != None:
        check.chart_rows = rows_pickled
        try:
            db.session.add(check)
            db.session.commit()
        except:
            db.session.rollback()

    else:

        hash_add = District_graphs(str_today(), distgroup, rows_pickled)

        try:
            db.session.add(hash_add)
            db.session.commit()
        except:
            db.session.rollback()
コード例 #7
0
def district(dynamic):
    print('starting district {}'.format(dynamic))

    time_delta = request.args.get('time_delta')
    url = request.path

    str_time_range = stringtime(time_delta)
    today = str_today()

    #Set str_today within page call, so is correct (today)
    # NOTE: Possibly faster to do this w/i hash_pickled db lookup
    #today = datetime.combine(date.today(), datetime.min.time())  #datetime object for midnight
    #str_today = today.strftime("%Y-%m-%d %H:%M:%S")         # string version of midnight

    # Most frequently used hashtags column
    dist_hashes = db.session.query(Hashtag.hashtag, func.count(Hashtag.hashtag)).\
    join(Post.districts).join(Post.hashtags).\
    filter(District.district_name==dynamic).\
    filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
    group_by(Hashtag.hashtag).\
    order_by(func.count(Hashtag.hashtag).desc()).all()

    print('got dist_hashes')

    # Most active tweeters column
    top_tweeters = db.session.query(User.user_scrname, func.count(User.user_scrname),\
    User.user_cap_perc, User.user_id).\
    join(Post.user).join(Post.districts).\
    filter(District.district_name==dynamic).\
    filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
    group_by(User.user_id).\
    order_by(func.count(User.user_id).desc()).all()

    print('got top_tweeters')

    # Most frequently retweeted users column )
    most_retweeted = db.session.query(Post.original_author_scrname, \
    func.count(Post.original_author_scrname)).\
    join(Post.districts).\
    filter(District.district_name==dynamic).\
    filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
    filter(Post.original_author_scrname != "").\
    group_by(Post.original_author_scrname).\
    order_by(func.count(Post.original_author_scrname).desc()).all()

    #Get botscore for top five most-retweeted users, create list of [name, retweet
    #numbers, botscore] to send to template
    most_retweeted_list = []

    for item in most_retweeted[0:5]:
        tweeter = []
        tweeter.append(item[0])
        tweeter.append(item[1])

        # Get botscore for original authors
        botscore = db.session.query(User.user_cap_perc).\
        filter(User.user_scrname==item[0]).first()

        if botscore:
            tweeter.append(botscore[0])
        else:
            tweeter.append("Not yet in database")
        most_retweeted_list.append(tweeter)

    print('got most_retweeted')

    # Most retweeted tweets column

    # Gets list of tweets in time period, ordered by most-retweeted (NOTE: many
    # or most of these retweets may be previous to this period)
    # most_retweeted_tweets = db.session.query(Post.post_id, Post.original_author_scrname, \
    # Post.retweet_count, Post.original_tweet_id, User.user_scrname, Post.tweet_html,
    # Post.text, Post.original_text).\
    # join(Post.districts).join(Post.user).\
    # filter(District.district_name==dynamic).filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < str_today).\
    # order_by(Post.retweet_count.desc()).all()
    #
    # # Use helper function to Get botscore for top five most-retweeted tweets,
    # # create list of [post_id, scrname, retweet count, botscore, post_html]
    #
    # most_retweeted_tweet_list = get_tweet_list(most_retweeted_tweets, dynamic)

    #most_retweeted_tweet_list_dated = get_tweet_list_dated(db_search_object, time_delta)

    print('got most_retweeted_list')

    # Get tweets most retweeted in this time period (by counting actual apperances)
    most_retweeted_inperiod = db.session.query(Post.original_tweet_id,\
    func.count(Post.original_tweet_id)).\
    join(Post.districts).\
    filter(Post.is_retweet == 1).filter(District.district_name==dynamic).\
    filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
    group_by(Post.original_tweet_id, Post.original_author_scrname).\
    order_by(func.count(Post.original_tweet_id).desc()).all()

    most_retweeted_inperiod_list = get_tweet_list_inperiod(
        most_retweeted_inperiod)

    print('got most_retweeted_list_inperiod')

    #Get basic district object for district info
    dist_obj = db.session.query(District.state_fullname, District.district, \
    District.incumbent, District.incumbent_party, District.clinton_2016, \
    District.trump_2016, District.dem_candidate, District.rep_candidate).\
    filter(District.district_name==dynamic).first()

    print('got dist_obj')

    #Using 3-day index for top-row hashtags to spotlight
    #hash_table_rows = gf.get_hash_rows(dynamic)
    hash_pickled = db.session.query(District_graphs.chart_rows).\
    filter(District_graphs.reference_date==today).\
    filter(District_graphs.district_name==dynamic).first()

    if hash_pickled != None:
        hash_table_rows = pickle.loads(hash_pickled[0])
    else:
        hash_table_rows = gf.get_hash_rows(dynamic)

    print(hash_table_rows)

    print('got chart_rows')

    return render_template('district.html', dynamic=dynamic, time_delta=time_delta, \
    url=url, dist_hashes=dist_hashes, top_tweeters=top_tweeters, \
    most_retweeted=most_retweeted, \
    t_form=ChangeTimeForm(), get_tweet=get_tweet, dist_obj=dist_obj, \
    test_insert=test_insert, distlist=distlist, hash_table_rows=hash_table_rows,\
    most_retweeted_list=most_retweeted_list, \
    most_retweeted_inperiod_list=most_retweeted_inperiod_list)
コード例 #8
0
def screen_name(dynamic):

    print('starting screen name {}'.format(dynamic))

    time_delta = request.args.get('time_delta')
    url = request.path
    str_time_range = stringtime(time_delta)

    today = str_today()

    # What hashtags are used most frequently by this screen name
    top_hashtags = db.session.query(Hashtag.hashtag, func.count(Hashtag.hashtag)).\
    join(Post.user).join(Post.hashtags).\
    filter(User.user_scrname == dynamic).\
    filter(Post.created_at_dt >= str_time_range).\
    filter(Post.created_at_dt < today).\
    group_by(Hashtag.hashtag).\
    order_by(func.count(Hashtag.hashtag).desc()).all()

    # Which districts are referenced most frequently by screen name
    top_districts = db.session.query(District.district_name, \
    func.count(District.district_name)).\
    join(Post.districts).join(Post.user).\
    filter(User.user_scrname == dynamic).\
    filter(Post.created_at_dt >= str_time_range).\
    filter(Post.created_at_dt < today).\
    group_by(District.district_name).\
    order_by(func.count(District.district_name).\
    desc()).all()

    user_obj = db.session.query(User).filter(
        User.user_scrname == dynamic).first()
    #Turn user_created into datetime obj for use with Moment
    if user_obj:
        user_created_date = get_tweet_datetime(user_obj.user_created)

    # Who has this user most frequently retweeted in this time period?

    retweeted_users_period = db.session.query(Post.original_author_scrname, \
    func.count(Post.original_author_scrname)).\
    join(Post.user).\
    filter(User.user_scrname == dynamic).\
    filter(Post.created_at_dt >= str_time_range).\
    filter(Post.created_at_dt < today).\
    filter(Post.original_author_scrname != "").\
    group_by(Post.original_author_scrname).\
    order_by(func.count(Post.original_author_scrname).desc()).all()

    # Who has this user most frequently retweeted overall (in accessible db)?

    retweeted_users_total = db.session.query(Post.original_author_scrname, \
    func.count(Post.original_author_scrname)).\
    join(Post.user).\
    filter(User.user_scrname == dynamic).\
    filter(Post.original_author_scrname != "").\
    group_by(Post.original_author_scrname).\
    order_by(func.count(Post.original_author_scrname).desc()).all()

    # Who has retweeted this user the most?

    who_retweets = db.session.query(User.user_scrname, \
    func.count(User.user_scrname)).\
    join(Post.user).\
    filter(Post.original_author_scrname == dynamic).\
    filter(Post.created_at_dt >= str_time_range).\
    filter(Post.created_at_dt < today).\
    group_by(User.user_scrname).\
    order_by(func.count(User.user_scrname).desc()).all()

    #Get top retweet (retweet count reflects original post_ NOTE: needs work on authors
    #idea: filtering by orig_author, only getting retweets, always with dynamic
    #screenname. Thus no User needed. Remove?)

    # All posts that are retweets, and have this user as original_author_scrname
    most_retweeted_tweets = db.session.query(Post.post_id, Post.original_author_scrname, \
    Post.retweet_count, Post.original_tweet_id, Post.tweet_html,
    Post.text, Post.original_text).\
    filter(Post.original_author_scrname==dynamic).\
    filter(Post.created_at_dt >= str_time_range).\
    filter(Post.created_at_dt < today).\
    order_by(Post.retweet_count.desc()).all()

    # Use helper function to Get botscore for top five most-retweeted tweets,
    # create list of [post_id, name, retweet numbers, botscore]

    most_retweeted_tweet_list = get_tweet_list_nodist(most_retweeted_tweets)

    # most_retweeted_inperiod = db.session.query(Post.original_tweet_id,\
    # func.count(Post.original_tweet_id)).\
    # join(Post.user).\
    # filter(Post.is_retweet == 1).filter(User.user_name==dynamic).\
    # filter(Post.created_at_dt >= str_time_range).\
    # group_by(Post.original_tweet_id).\
    # order_by(func.count(Post.original_tweet_id).desc()).all()
    #
    # most_retweeted_inperiod_list = get_tweet_list_inperiod(most_retweeted_inperiod)

    #Get data for double-7 chart
    scrname_chart = gf.scrname_chart(dynamic)

    hticks = [
        scrname_chart[0][0], scrname_chart[1][0], scrname_chart[2][0],
        scrname_chart[3][0], scrname_chart[4][0], scrname_chart[5][0],
        scrname_chart[6][0], scrname_chart[7][0], scrname_chart[8][0],
        scrname_chart[9][0]
    ]

    if user_obj:
        return render_template('screen_name.html', t_form=ChangeTimeForm(), \
        dynamic=dynamic, url=url, time_delta=time_delta, top_hashtags=top_hashtags, \
        top_districts=top_districts, user_obj=user_obj, \
        retweeted_users_period=retweeted_users_period, \
        retweeted_users_total=retweeted_users_total, who_retweets=who_retweets,\
        user_created_date=user_created_date, most_retweeted_tweets=most_retweeted_tweets,\
        scrname_chart=scrname_chart, hticks=hticks, get_tweet=get_tweet, \
        most_retweeted_tweet_list=most_retweeted_tweet_list)

    else:
        return render_template('doesnt_exist.html', dynamic=dynamic)