Exemple #1
0
    visit_tr2, coupon_tr, "small_area")
visit_tr2["genre"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr,
                                                    "genre")
visit_tr2["period"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr,
                                                     "period")

# group to reduce calculation load
visit = Grouping.to_group(visit_tr2,
                          ["USER_ID", "genre", "small_area", "period"], True)
candidate = Grouping.to_group(coupon_tr, ["genre", "small_area", "period"],
                              True)
active = Grouping.to_group(visit_tr2, ["USER_ID", "period"], False)

# numer
numer = visit.copy()
numer = Grouping.to_group_count(numer,
                                ["USER_ID", "genre", "small_area", "period"])

# denom
denom = candidate.merge(active, on="period")
denom = Grouping.to_group_count(denom,
                                ["USER_ID", "genre", "small_area", "period"])

# exclude non-spot genre and same name with prefecture
numer_mask = (numer.genre.isin(Utility.spot_genre)
              & ~(numer.small_area.isin(Utility.prefs)))
numer = numer[numer_mask]
numer = numer.rename(columns={"small_area": "sarea"})

denom_mask = (denom.genre.isin(Utility.spot_genre)
              & ~(denom.small_area.isin(Utility.prefs)))
denom = denom[denom_mask]
# remove duplicate
visit_tr2 = visit_tr.groupby(["USER_ID", "COUPON_ID"]).size().reset_index().drop(0, axis=1)

# add information
visit_tr2["genreprice"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr,"genreprice")
visit_tr2["period"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr, "period")

# group to reduce calculation load
visit = Grouping.to_group(visit_tr2, ["USER_ID", "genreprice", "period"], True)
candidate = Grouping.to_group(coupon_tr, ["genreprice", "period"], True)
active = Grouping.to_group(visit_tr2, ["USER_ID", "period"], False)

# numer
numer = visit.copy()
numer = Grouping.to_group_count(numer, ["USER_ID", "genreprice", "period"])

# denom
denom = candidate.merge(active, on="period")
denom = Grouping.to_group_count(denom, ["USER_ID", "genreprice", "period"])

# probablity dataframe ------------------------------------

# create pivoted probability dataframe
visit_pivot_genreprice = Grouping.to_pivotdf(numer, denom, "genreprice")
visit_pivot_genreprice_period = Grouping.to_pivotdf_period(numer, denom, "genreprice")

# change column names
visit_pivot_genreprice.rename(columns=lambda c: "v_{}".format(c), inplace=True)
visit_pivot_genreprice_period.rename(columns=lambda c: "v_{}".format(c), inplace=True)
# remove duplicate
visit_tr2 = visit_tr.groupby(["USER_ID", "COUPON_ID"]).size().reset_index().drop(0, axis=1)

# add information
visit_tr2["small_area"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr, "small_area")
visit_tr2["genre"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr, "genre")
visit_tr2["period"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr, "period")

# group to reduce calculation load
visit = Grouping.to_group(visit_tr2, ["USER_ID", "genre", "small_area", "period"], True)
candidate = Grouping.to_group(coupon_tr, ["genre", "small_area", "period"], True)
active = Grouping.to_group(visit_tr2, ["USER_ID", "period"], False)

# numer
numer = visit.copy()
numer = Grouping.to_group_count(numer, ["USER_ID", "genre", "small_area", "period"])

# denom
denom = candidate.merge(active, on="period")
denom = Grouping.to_group_count(denom, ["USER_ID", "genre", "small_area", "period"])

# exclude non-spot genre and same name with prefecture
numer_mask = numer.genre.isin(Utility.spot_genre) & ~(numer.small_area.isin(Utility.prefs))
numer = numer[numer_mask]
numer = numer.rename(columns={"small_area": "sarea"})

denom_mask = denom.genre.isin(Utility.spot_genre) & ~(denom.small_area.isin(Utility.prefs))
denom = denom[denom_mask]
denom = denom.rename(columns={"small_area": "sarea"})

# probablity dataframe ------------------------------------