def most_recent_actions(): """ Returns the most recent events for a user, primarily used in featurizing """ events = session.query(Users).join(Event).add_entity(Event).group_by(Users.id).order_by(Event.date.desc()).subquery() return query_to_df(session, events)
def most_recent_visits(): """ Returns the most recent visits for a user, primarily used in featurizing """ visits = session.query(Users).join(Visit).add_entity(Visit).group_by( Users.id).order_by(Visit.date.desc()) return query_to_df(session, visits)
def user_visited_in_last_k_days(threshold): now = datetime.utcnow() days = now - timedelta(days=threshold) """ Only grab the logins that occurred in the last 90 days """ most_recent_user_visits = session.query(Users.id,Visit.date,Users.Campaign_ID).join(Visit,Users.id == Visit.user_id).order_by(Visit.date.desc()).group_by(Users.id).filter(Users.Campaign_ID == 'TW') df = query_to_df(session,most_recent_user_visits) df = df.drop('Users_Campaign_ID',axis=1) df['churned'] = df['visit_date'].apply(lambda x : x < days) df = df.reset_index() print type(df) return df
from pandas import DataFrame from util import query_to_df from util import campaign_to_num, event_to_num, transform_column, hist_and_show, vectorize db = create_engine('sqlite:///forjar.db') metadata = MetaData(db) Session = sessionmaker(bind=db) session = Session() """ Counts the users by campaign id """ user_dist = session.query(Users) user_df = query_to_df(session, user_dist) transform_column(user_df, 'Users_Campaign_ID', campaign_to_num.get) hist_and_show(user_df, 'Users_Campaign_ID') q = session.query(Users.Campaign_ID, Event.Type, Users.id, Event.User_Id).filter(Event.Type == 'bought') d = query_to_df(session, q) print d.columns transform_column(d, 'Users_Campaign_ID', campaign_to_num.get) """ Show the counts for the event types """ transform_column(d, 'Event_Type', event_to_num.get) hist_and_show(d, 'Users_Campaign_ID')
db = create_engine('sqlite:///forjar.db') metadata = MetaData(db) Session = sessionmaker(bind=db) session = Session() """ Counts the users by campaign id """ user_dist = session.query(Users).join(Visit,Users.id == Visit.user_id).add_entity(Visit) user_df = query_to_df(session,user_dist) print user_df.columns user_df = user_df.groupby(['Users_id','visit_date']) user_logins = {} def add_to_logins(group): group = group.apply(pd.to_datetime) print group.as_matrix() return rolling_mean(group.as_matrix(),window='30d') user_df = user_df.aggregate(add_to_logins) print user_df
metadata = MetaData(db) Session = sessionmaker(bind=db) session = Session() """ We only want events and users such that the user bought an item. We count bought as $1 of revenue for simplicity. """ q = session.query(Users.Campaign_ID, Event.Type).filter(Event.Type == 'bought') """ Print out the counts by name. This is a way of showing how to aggregate by campaign ids. """ df = query_to_df(session, q) """ Basic statistics collecting; calculate the sum for each row. """ sum = 0 for campaign_id in campaign_to_num.keys(): rows = num_rows(df.groupby('Users_Campaign_ID').get_group(campaign_id)) sum = sum + rows print 'id was ' + campaign_id + ' ' + str(rows) """ Print the percentage of buy actions for each campaign id """ for campaign_id in campaign_to_num.keys(): rows = num_rows(df.groupby('Users_Campaign_ID').get_group(campaign_id)) rows = rows / sum print 'id was ' + campaign_id + ' and percent of customers bought was ' + str(
Session = sessionmaker(bind=db) session = Session() """ We only want events and users such that the user bought an item. We count bought as $1 of revenue for simplicity. """ q = session.query(Users.Campaign_ID,Event.Type).filter(Event.Type == 'bought') """ Print out the counts by name. This is a way of showing how to aggregate by campaign ids. """ df = query_to_df(session,q) """ Basic statistics collecting; calculate the sum for each row. """ sum = 0 for campaign_id in campaign_to_num.keys(): rows = num_rows(df.groupby('Users_Campaign_ID').get_group(campaign_id)) sum = sum + rows print 'id was ' + campaign_id + ' ' + str(rows) """ Print the percentage of buy actions for each campaign id """ for campaign_id in campaign_to_num.keys(): rows = num_rows(df.groupby('Users_Campaign_ID').get_group(campaign_id)) rows = rows / sum
def most_recent_visits(): """ Returns the most recent visits for a user, primarily used in featurizing """ visits = session.query(Users).join(Visit).add_entity(Visit).group_by(Users.id).order_by(Visit.date.desc()) return query_to_df(session, visits)