Beispiel #1
0
def create_last_events():
    view_name = "last_events"
    return create_view(
        view_name, """SELECT *
        FROM first_seen_from_events f
        JOIN events_all e
        ON f.distinct_id = e.distinct_id
        AND   e."timestamp"  - 2*60*60  <  f.first_seen_ts
        ;

        """)
Beispiel #2
0
def create_age_view():
    view_name = "age_from_events"
    return create_view(view_name, """
        SELECT
            f.distinct_id,
            last_seen_ts - first_seen_ts  as age_s,
            ceil( (last_seen_ts::numeric - first_seen_ts::numeric)/(60*60*24))
                    as age_days
        FROM first_seen_from_events f
        JOIN last_seen_from_events l
        ON f.distinct_id = l.distinct_id""")
Beispiel #3
0
def create_last_events():
    view_name = "last_events"
    return create_view(view_name,

                       """SELECT *
        FROM first_seen_from_events f
        JOIN events_all e
        ON f.distinct_id = e.distinct_id
        AND   e."timestamp"  - 2*60*60  <  f.first_seen_ts
        ;

        """)
Beispiel #4
0
def create_age_view():
    view_name = "age_from_events"
    return create_view(
        view_name, """
        SELECT
            f.distinct_id,
            last_seen_ts - first_seen_ts  as age_s,
            ceil( (last_seen_ts::numeric - first_seen_ts::numeric)/(60*60*24))
                    as age_days
        FROM first_seen_from_events f
        JOIN last_seen_from_events l
        ON f.distinct_id = l.distinct_id""")
Beispiel #5
0
def create_dates_during_users_life():
    view_name = "dates_during_users_life"
    return create_view(view_name, """
        SELECT distinct_id, dates.date, dates.week
        FROM dates
        JOIN first_last_seen fl
        ON
            dates.date >=
            DATE(TIMESTAMP 'epoch' + fl.first_seen_ts * INTERVAL '1 Second ')
            AND
            dates.date <=
            DATE(TIMESTAMP 'epoch' + fl.last_seen_ts * INTERVAL '1 Second ')
            """)
Beispiel #6
0
def create_dates_during_users_life():
    view_name = "dates_during_users_life"
    return create_view(
        view_name, """
        SELECT distinct_id, dates.date, dates.week
        FROM dates
        JOIN first_last_seen fl
        ON
            dates.date >=
            DATE(TIMESTAMP 'epoch' + fl.first_seen_ts * INTERVAL '1 Second ')
            AND
            dates.date <=
            DATE(TIMESTAMP 'epoch' + fl.last_seen_ts * INTERVAL '1 Second ')
            """)
def cut_events_by_time(view_name, condition):
    main_table = "e"
    columns = get_all_columns_without_id("events_all")

    col_sql = main_table + ".distinct_id, "
    for col in columns:
        col_sql += main_table + "." + "\"" + col + "\"" + ", "
    col_sql = col_sql[:-2]

    return create_view(
        view_name, """SELECT """ + col_sql + """FROM first_seen_from_events f
        JOIN events_all e
        ON f.distinct_id = e.distinct_id
        AND   """ + condition + """
        ;
        """)
def cut_events_by_time(view_name, condition):
    main_table = "e"
    columns = get_all_columns_without_id("events_all")

    col_sql = main_table + ".distinct_id, "
    for col in columns:
        col_sql += main_table + "." + "\"" + col + "\"" + ", "
    col_sql = col_sql[:-2]

    return create_view(view_name,
                       """SELECT """ + col_sql + """FROM first_seen_from_events f
        JOIN events_all e
        ON f.distinct_id = e.distinct_id
        AND   """ + condition + """
        ;
        """)
Beispiel #9
0
def create_first_events():
    '''
    Needed in order to establish
    what is a clean session and what is orphaned due to a bug
    Note: in resulting table there will be many rows if
    there are multiple events on first and last second
    '''
    view_name = "first_events"
    return create_view(
        view_name, """SELECT f.distinct_id, event_type as first_event_type,
        "new user" as first_event_new_user,result as first_event_result
        FROM first_seen_from_events f
        JOIN events_all e
        ON f.distinct_id = e.distinct_id
        AND f.first_seen_ts = e."timestamp"
        ;

        """)
Beispiel #10
0
def create_first_events():
    '''
    Needed in order to establish
    what is a clean session and what is orphaned due to a bug
    Note: in resulting table there will be many rows if
    there are multiple events on first and last second
    '''
    view_name = "first_events"
    return create_view(view_name,
                       """SELECT f.distinct_id, event_type as first_event_type,
        "new user" as first_event_new_user,result as first_event_result
        FROM first_seen_from_events f
        JOIN events_all e
        ON f.distinct_id = e.distinct_id
        AND f.first_seen_ts = e."timestamp"
        ;

        """)
Beispiel #11
0
def create_users():
    view_name = "users"
    return create_view(view_name,
                       """SELECT DISTINCT distinct_id, mp_country_code, "$region", "$city", "$email"
    FROM events_all""")
Beispiel #12
0
def make_events_lower_case():
    '''
    redshift forces column and table names to be lower case
    I want to create a table/column for every event type in the future
    In order to not think about case when doing the lookup
    I lowercase the event_types at the import stage here
    '''
    view_name = "events_all"
    return create_view(view_name, """
        SELECT
            "Scene Card Title",
            domain,
            "channel type",
            "$distinct_id",
            "User Id",
            type,
            "Upper History Navigation",
            "Author Name",
            "Bookmark navigation",
            "Operating System",
            "Read Time",
            "Gallery Object Type",
            "$radio",
            os_version,
            "$app_release",
            "Screen Width",
            "$device",
            "New User",
            "Upper Browser",
            "$ip",
            "User Continued Authentication",
            mp_device_model,
            "Scene Title",
            "$manufacturer",
            "$os_version",
            screen_width,
            "URL",
            "timestamp",
            "$lib_version",
            "Upper Channel Type",
            "$carrier",
            distinct_id,
            "Topic ID",
            "Account Created",
            "$screen_width",
            e,
            recipient,
            "AccountCreated",
            mp_country_code,
            "Story URL",
            "Default Topic Title",
            "Curator Name",
            mp_lib,
            "Menu Item",
            "Bookmark Title",
            n,
            "Bookmark ID",
            t,
            referrer,
            "$app_version",
            "$browser",
            "History navigation",
            "Author2 Name",
            browser,
            "Number of Bookmarks",
            delivery_id,
            "Default Topic ID",
            "$initial_referring_domain",
            "Trial to Signup",
            campaign_id,
            "External Link",
            "Scene ID",
            "Source",
            "$city",
            "Bookmark Navigation Clicked",
            "Curator ID", "OS Version",
            "Story Read Time", category,
            "$referrer",
            "Author ID",
            "Screen Height",
            "Drop Down Follow Through",
            "$os",
            "Accepted",
            message_id,
            "Time Since Story Opened",
            "User Created",
            "$wifi",
            "Scene Card ID",
            "$ios_ifa",
            "$email",
            "$initial_referrer",
            "Story ID",
            "$referring_domain",
            "Story Title",
            "$screen_height",
            a,
            "Topic Title",
            "Scene Card Number",
            "$model",
            "$region",
            "Share Source",
            message_type,
            "Story Completion",
            "Scene Cards",
            "First Launch",
            "Default Topic",
            "Result",
            "time",
            "Message",
            "Navigation Clicked",
            os,
            "Author2 ID",
            screen_height,
            v,
            id,
            LOWER(event_type) AS event_type
        FROM raw_copy
        WHERE "time" > '2015-01-15'
        AND time < '2015-02-28 23:59:59'
    """)
Beispiel #13
0
def create_clean_users():
    '''
    I only analyze sesssions where users have not signed up,
        not logged in and not used any features that force a profile creation.

    I am throwing away only 15k users signed up out of 300k.
    only 2k of them logged in second time on the web.
    Same 5% proportion among users with age > 10 days.
    So assume that registration doesn't affect
        whether this is a power user or not.
    See cleaning_target.md
    '''
    view_name = "clean_users"
    sql = """
    SELECT u.* FROM users_all_features u
    JOIN
        (
        SELECT DISTINCT distinct_id FROM users_first_last_events WHERE
        first_event_type IN
        (
        'app viewed',
        'app navigation clicked',
        'story navigation clicked',
        'story viewed',
        'topic viewed',
        'explore topic clicked',
        'explore viewed',
        'story completion'
        )
        AND last_event_type IN
        (
        'app viewed',
        'app navigation clicked',
        'story navigation clicked',
        'story viewed',
        'topic viewed',
        'explore topic clicked',
        'explore viewed',
        'story completion'
        ) ) c
ON  c.distinct_id = u.distinct_id
WHERE
     onboard_viewed_total =0
        AND onboard_navigation_total = 0
        AND email_register_total = 0
        AND login_result_total = 0
        AND Bookmarks_Viewed_total = 0
        AND  bookmark_navigation_total =0
        AND profile_viewed_total = 0
        AND profile_page_viewed_total = 0
        AND profile_navigation_clicked_total = 0
        AND login_page_viewed_total = 0
        AND login_viewed_total = 0
        AND logging_in_user_id_total = 0
        AND bookmark_viewed_total = 0
        AND bookmark_story_total = 0
        AND bookmark_remove_total = 0
        AND forgot_password_page_viewed_total = 0
        AND bookmark_navigation_total = 0
        ;"""
    return create_view(view_name, sql)
Beispiel #14
0
def create_users_first_last_events():
    view_name = "users_first_last_events"
    tables = get_all_first_last_event_tables()
    return create_view(view_name, aggregate_all_features(tables))
Beispiel #15
0
def create_users_first_last_events():
    view_name = "users_first_last_events"
    tables = get_all_first_last_event_tables()
    return create_view(view_name, aggregate_all_features(tables))
Beispiel #16
0
def create_clean_users():
    '''
    I only analyze sesssions where users have not signed up,
        not logged in and not used any features that force a profile creation.

    I am throwing away only 15k users signed up out of 300k.
    only 2k of them logged in second time on the web.
    Same 5% proportion among users with age > 10 days.
    So assume that registration doesn't affect
        whether this is a power user or not.
    See cleaning_target.md
    '''
    view_name = "clean_users"
    sql = """
    SELECT u.* FROM users_all_features u
    JOIN
        (
        SELECT DISTINCT distinct_id FROM users_first_last_events WHERE
        first_event_type IN
        (
        'app viewed',
        'app navigation clicked',
        'story navigation clicked',
        'story viewed',
        'topic viewed',
        'explore topic clicked',
        'explore viewed',
        'story completion'
        )
        AND last_event_type IN
        (
        'app viewed',
        'app navigation clicked',
        'story navigation clicked',
        'story viewed',
        'topic viewed',
        'explore topic clicked',
        'explore viewed',
        'story completion'
        ) ) c
ON  c.distinct_id = u.distinct_id
WHERE
     onboard_viewed_total =0
        AND onboard_navigation_total = 0
        AND email_register_total = 0
        AND login_result_total = 0
        AND Bookmarks_Viewed_total = 0
        AND  bookmark_navigation_total =0
        AND profile_viewed_total = 0
        AND profile_page_viewed_total = 0
        AND profile_navigation_clicked_total = 0
        AND login_page_viewed_total = 0
        AND login_viewed_total = 0
        AND logging_in_user_id_total = 0
        AND bookmark_viewed_total = 0
        AND bookmark_story_total = 0
        AND bookmark_remove_total = 0
        AND forgot_password_page_viewed_total = 0
        AND bookmark_navigation_total = 0
        ;"""
    return create_view(view_name, sql)
Beispiel #17
0
def make_events_lower_case():
    '''
    redshift forces column and table names to be lower case
    I want to create a table/column for every event type in the future
    In order to not think about case when doing the lookup
    I lowercase the event_types at the import stage here
    '''
    view_name = "events_all"
    return create_view(
        view_name, """
        SELECT
            "Scene Card Title",
            domain,
            "channel type",
            "$distinct_id",
            "User Id",
            type,
            "Upper History Navigation",
            "Author Name",
            "Bookmark navigation",
            "Operating System",
            "Read Time",
            "Gallery Object Type",
            "$radio",
            os_version,
            "$app_release",
            "Screen Width",
            "$device",
            "New User",
            "Upper Browser",
            "$ip",
            "User Continued Authentication",
            mp_device_model,
            "Scene Title",
            "$manufacturer",
            "$os_version",
            screen_width,
            "URL",
            "timestamp",
            "$lib_version",
            "Upper Channel Type",
            "$carrier",
            distinct_id,
            "Topic ID",
            "Account Created",
            "$screen_width",
            e,
            recipient,
            "AccountCreated",
            mp_country_code,
            "Story URL",
            "Default Topic Title",
            "Curator Name",
            mp_lib,
            "Menu Item",
            "Bookmark Title",
            n,
            "Bookmark ID",
            t,
            referrer,
            "$app_version",
            "$browser",
            "History navigation",
            "Author2 Name",
            browser,
            "Number of Bookmarks",
            delivery_id,
            "Default Topic ID",
            "$initial_referring_domain",
            "Trial to Signup",
            campaign_id,
            "External Link",
            "Scene ID",
            "Source",
            "$city",
            "Bookmark Navigation Clicked",
            "Curator ID", "OS Version",
            "Story Read Time", category,
            "$referrer",
            "Author ID",
            "Screen Height",
            "Drop Down Follow Through",
            "$os",
            "Accepted",
            message_id,
            "Time Since Story Opened",
            "User Created",
            "$wifi",
            "Scene Card ID",
            "$ios_ifa",
            "$email",
            "$initial_referrer",
            "Story ID",
            "$referring_domain",
            "Story Title",
            "$screen_height",
            a,
            "Topic Title",
            "Scene Card Number",
            "$model",
            "$region",
            "Share Source",
            message_type,
            "Story Completion",
            "Scene Cards",
            "First Launch",
            "Default Topic",
            "Result",
            "time",
            "Message",
            "Navigation Clicked",
            os,
            "Author2 ID",
            screen_height,
            v,
            id,
            LOWER(event_type) AS event_type
        FROM raw_copy
        WHERE "time" > '2015-01-15'
        AND time < '2015-02-28 23:59:59'
    """)
Beispiel #18
0
def createView():
    """Return true if view was created, else false, in JSON format"""
    if create_view(session["user_id"], request.args.get("viewname")):
        return jsonify(True)
    else:
        return jsonify(False)
Beispiel #19
0
def create_users():
    view_name = "users"
    return create_view(
        view_name,
        """SELECT DISTINCT distinct_id, mp_country_code, "$region", "$city", "$email"
    FROM events_all""")