예제 #1
0
def get_events(
    users: List[str],
    start: date = date(2018, 1, 1),
    end: date = date(2019, 1, 1)
) -> pd.DataFrame:
    return sql_to_df("events.sql", users=users, start=start,
                     end=end).pipe(_rename_events).pipe(coerce_types,
                                                        types=Cols.types())
예제 #2
0
파일: viz.py 프로젝트: giokincade/primary
def products_per_bag(start: date = date(2018, 12, 1), end: date = date(2019, 2, 25)):
    init_plt()
    user_date_stats = sql_to_df('views_per_bag.sql', start=start, end=end)
    user_date_stats["bag_indicator"] = user_date_stats["bag_indicator"].astype(float)
    sns.lineplot(x="product", y="bag_indicator", data=user_date_stats)
    plt.gcf().suptitle("Product Views vs Bags")
    plt.xlabel("Product Views")
    plt.ylabel("Likelihood of Bagging")
    plt.xlim(0, 25)
    plt.show()
예제 #3
0
def _join_first_order_facts(users: pd.DataFrame) -> pd.DataFrame:
    data = sql_to_df("first_order_facts.sql").set_index(Cols.EMAIL)
    data[Cols.FIRST_ORDER_DIVISION] = data[Cols.FIRST_ORDER_DIVISION].replace(
        regex=r"\|unknown", value="")
    data[Cols.FIRST_ORDER_HAS_BABY] = data[Cols.FIRST_ORDER_DIVISION].isin(
        ["baby", "baby|kids"]).astype(int).astype(float) * 1.0
    data[Cols.FIRST_ORDER_HAS_KIDS] = data[Cols.FIRST_ORDER_DIVISION].isin(
        ["kids", "baby|kids"]).astype(int).astype(float) * 1.0
    joined = users.join(data, on=Cols.EMAIL, how="left")
    return joined
예제 #4
0
def _join_mixpanel_stats(users: pd.DataFrame) -> pd.DataFrame:
    mixpanel_data = sql_to_df("user_visit_stats.sql").set_index(Cols.EMAIL)
    joined = users.join(mixpanel_data, on=Cols.EMAIL, how="left")
    # Single-visit users were retained for one day.
    joined[Cols.DAYS_RETAINED] = joined[Cols.DAYS_RETAINED].apply(
        lambda d: 1.0 if d < 1 else d)
    joined[Cols.DAYS_RETAINED_PER_VISIT] = joined[
        Cols.DAYS_RETAINED_PER_VISIT].apply(lambda d: 1.0 if d < 1 else d)

    joined[Cols.QUARTERS_RETAINED] = joined[Cols.DAYS_RETAINED].apply(
        lambda d: d / 90.0 if d >= 90.0 else 1.0)
    joined[Cols.ORDERS_PER_QUARTER] = joined[Cols.LIFETIME_ORDERS] / joined[
        Cols.QUARTERS_RETAINED]

    joined[Cols.YEARS_RETAINED] = joined[Cols.DAYS_RETAINED].apply(
        lambda d: d / 365.0 if d >= 365.0 else 1.0)
    joined[Cols.ORDERS_PER_YEAR] = joined[Cols.LIFETIME_ORDERS] / joined[
        Cols.YEARS_RETAINED]
    return joined
예제 #5
0
def get_transactions() -> pd.DataFrame:
    return sql_to_df("transactions.sql").pipe(
        coerce_types, types=Cols.types()).pipe(_calculate_fields)