def get_revisions_and_flagged_data(rev_ids, treatment_date, con):
    """get number of revisons flagged before `treatment_date`
      among last 50 edits"""
    rev_flag_sql = """
    select rev_id, 
            rev_page, 
            page_namespace, 
            rev_timestamp, 
            fr_timestamp, 
            (select max(fr_timestamp) from flaggedrevs where fr_page_id=rev_page and fr_timestamp < :treatment_date) max_fr_ts 
        from (
              select rev_id, rev_page, rev_timestamp, page_namespace from revision_userindex
                join page on page_id = rev_page where rev_id in ({rev_ids_str}) ) auser
        left join flaggedrevs on
            fr_page_id = rev_page and
            fr_rev_id = rev_id;
                """.format(rev_ids_str="{}".format(','.join(
        [str(x) for x in rev_ids])) if len(rev_ids) > 0 else "null")
    rev_flag_params = {'treatment_date': to_wmftimestamp(treatment_date)}
    # print(rev_flag_params)
    con.execute('use dewiki_p;')
    rev_flag = pd.read_sql(sqlalchemy.text(rev_flag_sql),
                           con,
                           params=rev_flag_params)
    rev_flag['fr_timestamp'] = rev_flag['fr_timestamp'].apply(
        from_wmftimestamp)
    rev_flag['max_fr_ts'] = rev_flag['max_fr_ts'].apply(from_wmftimestamp)
    rev_flag['rev_timestamp'] = rev_flag['rev_timestamp'].apply(
        from_wmftimestamp)
    return rev_flag
def get_active_users(lang, start_date, end_date, min_rev_id, wmf_con):
    """
    Return the first and last edits of only active users in `lang`wiki
    between the start_date and end_date.
    """
    wmf_con.execute(f'use {lang}wiki_p;')

    active_sql = """select user_id, user_name, user_registration, user_editcount as live_edit_count
                        from (select distinct(rev_user) from revision 
                            where rev_timestamp >= :start_date and rev_timestamp <= :end_date
                            and rev_id > :min_rev_id) active_users
                        join user on active_users.rev_user=user.user_id;"""
    active_sql_esc = sqlalchemy.text(active_sql)
    params = {"start_date":int(to_wmftimestamp(start_date)),
              "end_date":int(to_wmftimestamp(end_date)),
              "min_rev_id":min_rev_id}
    active_df = pd.read_sql(active_sql_esc, con=wmf_con, params=params)
    return active_df
def get_timestamps_within_range(lang, user_id, con, start_date, end_date):
    '''this will get all the timestamps of edits for a user that occured before or after 90 within a
    date range from start_date to end_date'''

    con.execute('use {lang}wiki_p;'.format(lang=lang))
    rev_sql = '''select rev_timestamp from revision_userindex where rev_user = :user_id
                and rev_timestamp >= :start_date and rev_timestamp < :end_date 
                order by rev_timestamp
                '''
    rev_sql_esc = sqlalchemy.text(rev_sql)
    sql_params = {
        'user_id': int(user_id),
        'start_date': to_wmftimestamp(start_date),
        'end_date': to_wmftimestamp(end_date)
    }
    rev_ts_series = pd.read_sql(rev_sql_esc, con=con, params=sql_params)
    rev_ts_series['rev_timestamp'] = rev_ts_series['rev_timestamp'].apply(
        from_wmftimestamp)
    return rev_ts_series
def get_thanks_thanking_user(lang, user_name, start_date, end_date, wmf_con):
    wmf_con.execute(f"use {lang}wiki_p;")
    user_thank_sql = """
                    select thank_timestamp, sender, receiver, ru.user_id as receiver_id, su.user_id as sender_id from
                        (select log_timestamp as thank_timestamp, replace(log_title, '_', ' ') as receiver, log_user_text as sender
                        from logging_logindex where log_title = :user_name
                        and log_action = 'thank'
                        and :start_date <= log_timestamp <= :end_date ) t
                    left join user ru on ru.user_name = t.receiver
                    left join user su on su.user_name = t.sender """
    user_thank_sql_esc = sqlalchemy.text(user_thank_sql)
    sql_params = {
        'user_name': user_name.replace(' ', '_'),
        'start_date': to_wmftimestamp(start_date),
        'end_date': to_wmftimestamp(end_date)
    }
    df = pd.read_sql(user_thank_sql_esc, con=wmf_con, params=sql_params)
    df['thank_timestamp'] = df['thank_timestamp'].apply(from_wmftimestamp)
    df['sender'] = df['sender'].apply(decode_or_nan)
    df['receiver'] = df['receiver'].apply(decode_or_nan)
    return df
def get_users_edit_spans(lang, start_date, end_date, wmf_con):
    """
    Return the the first and last edits of all users in `lang`wiki
    between the start_date and end_date
    """
    db_prefix = f'{lang}wiki_p'
    wmf_con.execute(f'use {db_prefix};')
    reg_sql = '''select '{lang}' as lang, user_id, user_name, user_registration, user_editcount as live_edit_count,
       (select min(rev_timestamp) from revision_userindex where rev_user=user_id and {start_date} <= rev_timestamp <= {end_date}) as first_edit, 
       (select max(rev_timestamp) from revision_userindex where rev_user=user_id and {start_date} <= rev_timestamp <= {end_date}) as last_edit
from user where coalesce(user_registration, 20010101000000) <= {end_date} 
     and 
                coalesce(user_registration, 20010101000000) >= {start_date};
'''.format(start_date=to_wmftimestamp(start_date),
           end_date=to_wmftimestamp(end_date),
           lang=lang)
    span_df = pd.read_sql(reg_sql, wmf_con)
    span_df['user_registration'] = span_df['user_registration'].apply(
        from_wmftimestamp)
    span_df['first_edit'] = span_df['first_edit'].apply(from_wmftimestamp)
    span_df['last_edit'] = span_df['last_edit'].apply(from_wmftimestamp)
    span_df['user_name'] = span_df['user_name'].apply(decode_or_nan)
    return span_df
def get_recent_edits(lang,
                     user_id,
                     con,
                     prior_days=None,
                     max_revs=None,
                     end_date=None):
    '''this will get all the rev_ids for a user that occured less than `prior_days` days before their last edit before `start_date`
    and no more than `max_revs` edits in total
    :param con:
    :param start_date'''
    if not end_date:
        end_date = datetime.datetime.utcnow()
    if not prior_days:
        prior_days = 84
    if not max_revs:
        max_revs = 50
    con.execute('use {lang}wiki_p;'.format(lang=lang))
    revsql = ''' select user_id, rev_timestamp, rev_id, page_id, page_namespace from
            (select user_id, ts as rev_timestamp, rev_id, rev_page from
            (select a.rev_user as user_id, timestamp(a.rev_timestamp) as ts, a.rev_id as rev_id, timestamp(b.mts) as mts, rev_page
            from
            (select rev_user, rev_timestamp, rev_id, rev_page from revision_userindex where rev_user = {user_id} and rev_timestamp <= {end_date}) a
            join
            (select rev_user, max(rev_timestamp) as mts from revision_userindex where rev_user = {user_id} and rev_timestamp <= {end_date})  b
            on a.rev_user = b.rev_user
            ) uhist
            where ts > date_sub(mts, interval {prior_days} day)
            limit {max_revs}) revs
            join page
            on rev_page = page_id;
            '''.format(user_id=user_id,
                       prior_days=prior_days,
                       max_revs=max_revs,
                       end_date=to_wmftimestamp(end_date))
    udf = pd.read_sql(revsql, con)
    return udf