from util import Utility, Grouping from util_logger import get_logger LOG = get_logger() LOG.info("start b11b") # load files detail_tr = pd.read_pickle("../model/detail_tr.pkl") coupon_tr = pd.read_pickle("../model/coupon_tr.pkl") users = pd.read_pickle("../model/users.pkl") # calculate numer and denom -------------------------------- # remove duplicate bought = detail_tr.copy() bought = Grouping.to_group(bought, ["USER_ID", "COUPON_ID"], False) # denom active = bought.copy() active = active.merge(coupon_tr, on=["COUPON_ID"]) active = Grouping.to_group(active, ["USER_ID", "period"], False) cpntr2 = Grouping.to_group(coupon_tr, ["small_area", "genre", "period"], True) denom = active.copy() denom = denom.merge(users, on="USER_ID") denom = denom[["USER_ID", "period"]] denom = denom.merge(cpntr2, on=["period"]) denom = denom[["USER_ID", "small_area", "genre", "count", "period"]].copy() # numer
# add genre-price information coupons_price = pd.read_pickle("../model/coupons_price.pkl") coupon_tr = coupon_tr.merge(coupons_price, on="COUPON_ID") # calculate numer and denom -------------------------------- # remove duplicate visit_tr2 = visit_tr.groupby(["USER_ID", "COUPON_ID"]).size().reset_index().drop(0, axis=1) # add information visit_tr2["genreprice"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr,"genreprice") visit_tr2["period"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr, "period") # group to reduce calculation load visit = Grouping.to_group(visit_tr2, ["USER_ID", "genreprice", "period"], True) candidate = Grouping.to_group(coupon_tr, ["genreprice", "period"], True) active = Grouping.to_group(visit_tr2, ["USER_ID", "period"], False) # numer numer = visit.copy() numer = Grouping.to_group_count(numer, ["USER_ID", "genreprice", "period"]) # denom denom = candidate.merge(active, on="period") denom = Grouping.to_group_count(denom, ["USER_ID", "genreprice", "period"]) # probablity dataframe ------------------------------------ # create pivoted probability dataframe visit_pivot_genreprice = Grouping.to_pivotdf(numer, denom, "genreprice")
# remove duplicate visit_tr2 = (visit_tr.groupby(["USER_ID", "COUPON_ID"]).size().reset_index().drop(0, axis=1)) # add information visit_tr2["small_area"] = Grouping.lookup_coupon_element( visit_tr2, coupon_tr, "small_area") visit_tr2["genre"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr, "genre") visit_tr2["period"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr, "period") # group to reduce calculation load visit = Grouping.to_group(visit_tr2, ["USER_ID", "genre", "small_area", "period"], True) candidate = Grouping.to_group(coupon_tr, ["genre", "small_area", "period"], True) active = Grouping.to_group(visit_tr2, ["USER_ID", "period"], False) # numer numer = visit.copy() numer = Grouping.to_group_count(numer, ["USER_ID", "genre", "small_area", "period"]) # denom denom = candidate.merge(active, on="period") denom = Grouping.to_group_count(denom, ["USER_ID", "genre", "small_area", "period"]) # exclude non-spot genre and same name with prefecture
# load files users = pd.read_pickle("../model/users.pkl") coupons = pd.read_pickle("../model/coupons.pkl") coupon_tr = pd.read_pickle("../model/coupon_tr.pkl") coupon_te = pd.read_pickle("../model/coupon_te.pkl") detail_tr = pd.read_pickle("../model/detail_tr.pkl") # train data --------------------------------- # exclude non_spot genre from training data coupon_tr2 = coupon_tr[coupon_tr.genre.isin(Utility.spot_genre)].copy() detail_tr2 = detail_tr[detail_tr.COUPON_ID.isin(coupon_tr2.COUPON_ID)].copy() # user/coupon pairs, purchase occured bought = detail_tr2.copy() bought = Grouping.to_group(bought, ["USER_ID", "COUPON_ID"], False) # user/period pairs, purchase occured in the period active = bought.copy() active = active.merge(coupon_tr2[["COUPON_ID","period"]], on = ["COUPON_ID"]) active = Grouping.to_group(active, ["USER_ID", "period"], False) def random_index(all_count, sample_count, seed): """Create integer list randomly, sample_count out of all_count.""" np.random.seed(seed) rd = np.random.rand(all_count) idxes = np.argsort(rd) return idxes[:sample_count] # negative samples - coupons * users active in the period traind = coupon_tr2[["COUPON_ID","period"]].copy()
import numpy as np from util import Utility, Grouping from util_logger import get_logger LOG = get_logger() LOG.info("start b11b") # load files detail_tr = pd.read_pickle("../model/detail_tr.pkl") coupon_tr = pd.read_pickle("../model/coupon_tr.pkl") users = pd.read_pickle("../model/users.pkl") # calculate numer and denom -------------------------------- # remove duplicate bought = detail_tr.copy() bought = Grouping.to_group(bought, ["USER_ID", "COUPON_ID"], False) # denom active = bought.copy() active = active.merge(coupon_tr, on = ["COUPON_ID"]) active = Grouping.to_group(active, ["USER_ID","period"], False) cpntr2 = Grouping.to_group(coupon_tr,["small_area","genre","period"], True) denom = active.copy() denom = denom.merge(users, on="USER_ID") denom = denom[["USER_ID", "period"]] denom = denom.merge(cpntr2, on = ["period"]) denom = denom[["USER_ID","small_area","genre","count", "period"]].copy() # numer
import pandas as pd import numpy as np from util import Grouping, Process from util_logger import get_logger LOG = get_logger() LOG.info("start b14") # load files users = pd.read_pickle("../model/users.pkl") coupons = pd.read_pickle("../model/coupons.pkl") coupon_tr = pd.read_pickle("../model/coupon_tr.pkl") detail_tr = pd.read_pickle("../model/detail_tr.pkl") # count purchases per (user, couponkey) bought = detail_tr.copy() bought = Grouping.to_group(bought, ["USER_ID", "COUPON_ID"], False) bought = bought.merge(coupon_tr, on="COUPON_ID", how='left') bought["key1"] = Process.to_key1(bought) # remove duplication bought = Grouping.to_group(bought, ["USER_ID", "period", "key1"], False) # count purchases per (user, couponkey) past_buy_key = Grouping.to_group(bought, ["USER_ID", "key1"], True) past_buy_key = past_buy_key.set_index(["USER_ID", "key1"]) past_buy_key_period = Grouping.to_group(bought, ["USER_ID", "key1", "period"], True) past_buy_key_period = past_buy_key_period.set_index(["USER_ID", "key1", "period"]) # write past_buy_key.to_pickle("../model/past_buy_key.pkl")
# load files users = pd.read_pickle("../model/users.pkl") coupons = pd.read_pickle("../model/coupons.pkl") coupon_tr = pd.read_pickle("../model/coupon_tr.pkl") coupon_te = pd.read_pickle("../model/coupon_te.pkl") detail_tr = pd.read_pickle("../model/detail_tr.pkl") # train data --------------------------------- # exclude non_spot genre from training data coupon_tr2 = coupon_tr[coupon_tr.genre.isin(Utility.spot_genre)].copy() detail_tr2 = detail_tr[detail_tr.COUPON_ID.isin(coupon_tr2.COUPON_ID)].copy() # user/coupon pairs, purchase occured bought = detail_tr2.copy() bought = Grouping.to_group(bought, ["USER_ID", "COUPON_ID"], False) # user/period pairs, purchase occured in the period active = bought.copy() active = active.merge(coupon_tr2[["COUPON_ID", "period"]], on=["COUPON_ID"]) active = Grouping.to_group(active, ["USER_ID", "period"], False) def random_index(all_count, sample_count, seed): """Create integer list randomly, sample_count out of all_count.""" np.random.seed(seed) rd = np.random.rand(all_count) idxes = np.argsort(rd) return idxes[:sample_count]