Beispiel #1
0
def most_recent_actions():
     """
     Returns the most recent events for a user, primarily used in featurizing
     """
     events = session.query(Users).join(Event).add_entity(Event).group_by(Users.id).order_by(Event.date.desc()).subquery()

     return query_to_df(session, events)
Beispiel #2
0
def most_recent_visits():
    """
     Returns the most recent visits for a user, primarily used in featurizing
     """
    visits = session.query(Users).join(Visit).add_entity(Visit).group_by(
        Users.id).order_by(Visit.date.desc())
    return query_to_df(session, visits)
def most_recent_actions():
     """
     Returns the most recent events for a user, primarily used in featurizing
     """
     events = session.query(Users).join(Event).add_entity(Event).group_by(Users.id).order_by(Event.date.desc()).subquery()

     return query_to_df(session, events)
Beispiel #4
0
def user_visited_in_last_k_days(threshold):
    now = datetime.utcnow()
    days = now - timedelta(days=threshold)
    """
    Only grab the logins that occurred in the last 90 days
    """
    most_recent_user_visits = session.query(Users.id,Visit.date,Users.Campaign_ID).join(Visit,Users.id == Visit.user_id).order_by(Visit.date.desc()).group_by(Users.id).filter(Users.Campaign_ID == 'TW')

    df = query_to_df(session,most_recent_user_visits)
    df = df.drop('Users_Campaign_ID',axis=1)
    df['churned'] = df['visit_date'].apply(lambda x : x < days)
    df = df.reset_index()
    print type(df)
    return df
def user_visited_in_last_k_days(threshold):
    now = datetime.utcnow()
    days = now - timedelta(days=threshold)
    """
    Only grab the logins that occurred in the last 90 days
    """
    most_recent_user_visits = session.query(Users.id,Visit.date,Users.Campaign_ID).join(Visit,Users.id == Visit.user_id).order_by(Visit.date.desc()).group_by(Users.id).filter(Users.Campaign_ID == 'TW')

    df = query_to_df(session,most_recent_user_visits)
    df = df.drop('Users_Campaign_ID',axis=1)
    df['churned'] = df['visit_date'].apply(lambda x : x < days)
    df = df.reset_index()
    print type(df)
    return df
Beispiel #6
0
from pandas import DataFrame
from util import query_to_df
from util import campaign_to_num, event_to_num, transform_column, hist_and_show, vectorize

db = create_engine('sqlite:///forjar.db')

metadata = MetaData(db)

Session = sessionmaker(bind=db)

session = Session()
"""
Counts the users by campaign id
"""
user_dist = session.query(Users)
user_df = query_to_df(session, user_dist)
transform_column(user_df, 'Users_Campaign_ID', campaign_to_num.get)

hist_and_show(user_df, 'Users_Campaign_ID')

q = session.query(Users.Campaign_ID, Event.Type, Users.id,
                  Event.User_Id).filter(Event.Type == 'bought')
d = query_to_df(session, q)
print d.columns

transform_column(d, 'Users_Campaign_ID', campaign_to_num.get)
"""
Show the counts for the event types
"""
transform_column(d, 'Event_Type', event_to_num.get)
hist_and_show(d, 'Users_Campaign_ID')
db = create_engine('sqlite:///forjar.db')


metadata = MetaData(db)

Session = sessionmaker(bind=db)


session = Session()


"""
Counts the users by campaign id
"""
user_dist = session.query(Users).join(Visit,Users.id == Visit.user_id).add_entity(Visit)
user_df = query_to_df(session,user_dist)
print user_df.columns
user_df = user_df.groupby(['Users_id','visit_date'])
user_logins = {}

def add_to_logins(group):
    group = group.apply(pd.to_datetime)
    print group.as_matrix()
    return rolling_mean(group.as_matrix(),window='30d')
user_df = user_df.aggregate(add_to_logins)

print user_df



metadata = MetaData(db)

Session = sessionmaker(bind=db)

session = Session()
"""
We only want events and users such that the user bought an item.
We count bought as $1 of revenue for simplicity.
"""

q = session.query(Users.Campaign_ID, Event.Type).filter(Event.Type == 'bought')
"""
Print out the counts by name.
This is a way of showing how to aggregate by campaign ids.
"""
df = query_to_df(session, q)
"""
Basic statistics collecting; calculate the sum for each row.
"""
sum = 0
for campaign_id in campaign_to_num.keys():
    rows = num_rows(df.groupby('Users_Campaign_ID').get_group(campaign_id))
    sum = sum + rows
    print 'id was ' + campaign_id + ' ' + str(rows)
"""
Print the percentage of buy actions for each campaign id
"""
for campaign_id in campaign_to_num.keys():
    rows = num_rows(df.groupby('Users_Campaign_ID').get_group(campaign_id))
    rows = rows / sum
    print 'id was ' + campaign_id + ' and percent of customers bought was ' + str(
Beispiel #9
0
Session = sessionmaker(bind=db)


session = Session()
"""
We only want events and users such that the user bought an item.
We count bought as $1 of revenue for simplicity.
"""

q = session.query(Users.Campaign_ID,Event.Type).filter(Event.Type == 'bought')

"""
Print out the counts by name.
This is a way of showing how to aggregate by campaign ids.
"""
df = query_to_df(session,q)
"""
Basic statistics collecting; calculate the sum for each row.
"""
sum = 0
for campaign_id in campaign_to_num.keys():
    rows = num_rows(df.groupby('Users_Campaign_ID').get_group(campaign_id))
    sum = sum + rows
    print 'id was ' + campaign_id + ' ' + str(rows)

"""
Print the percentage of buy actions for each campaign id
"""
for campaign_id in campaign_to_num.keys():
    rows = num_rows(df.groupby('Users_Campaign_ID').get_group(campaign_id))
    rows = rows / sum
Beispiel #10
0
def most_recent_visits():
     """
     Returns the most recent visits for a user, primarily used in featurizing
     """
     visits = session.query(Users).join(Visit).add_entity(Visit).group_by(Users.id).order_by(Visit.date.desc())
     return query_to_df(session, visits)