Ejemplo n.º 1
0
from util import Utility, Grouping
from util_logger import get_logger

LOG = get_logger()
LOG.info("start b11b")

# load files
detail_tr = pd.read_pickle("../model/detail_tr.pkl")
coupon_tr = pd.read_pickle("../model/coupon_tr.pkl")
users = pd.read_pickle("../model/users.pkl")

# calculate numer and denom --------------------------------

# remove duplicate
bought = detail_tr.copy()
bought = Grouping.to_group(bought, ["USER_ID", "COUPON_ID"], False)

# denom
active = bought.copy()
active = active.merge(coupon_tr, on=["COUPON_ID"])
active = Grouping.to_group(active, ["USER_ID", "period"], False)

cpntr2 = Grouping.to_group(coupon_tr, ["small_area", "genre", "period"], True)

denom = active.copy()
denom = denom.merge(users, on="USER_ID")
denom = denom[["USER_ID", "period"]]
denom = denom.merge(cpntr2, on=["period"])
denom = denom[["USER_ID", "small_area", "genre", "count", "period"]].copy()

# numer
# add genre-price information
coupons_price = pd.read_pickle("../model/coupons_price.pkl")
coupon_tr = coupon_tr.merge(coupons_price, on="COUPON_ID")

# calculate numer and denom --------------------------------

# remove duplicate
visit_tr2 = visit_tr.groupby(["USER_ID", "COUPON_ID"]).size().reset_index().drop(0, axis=1)

# add information
visit_tr2["genreprice"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr,"genreprice")
visit_tr2["period"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr, "period")

# group to reduce calculation load
visit = Grouping.to_group(visit_tr2, ["USER_ID", "genreprice", "period"], True)
candidate = Grouping.to_group(coupon_tr, ["genreprice", "period"], True)
active = Grouping.to_group(visit_tr2, ["USER_ID", "period"], False)

# numer
numer = visit.copy()
numer = Grouping.to_group_count(numer, ["USER_ID", "genreprice", "period"])

# denom
denom = candidate.merge(active, on="period")
denom = Grouping.to_group_count(denom, ["USER_ID", "genreprice", "period"])

# probablity dataframe ------------------------------------

# create pivoted probability dataframe
visit_pivot_genreprice = Grouping.to_pivotdf(numer, denom, "genreprice")
Ejemplo n.º 3
0
# remove duplicate
visit_tr2 = (visit_tr.groupby(["USER_ID",
                               "COUPON_ID"]).size().reset_index().drop(0,
                                                                       axis=1))

# add information
visit_tr2["small_area"] = Grouping.lookup_coupon_element(
    visit_tr2, coupon_tr, "small_area")
visit_tr2["genre"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr,
                                                    "genre")
visit_tr2["period"] = Grouping.lookup_coupon_element(visit_tr2, coupon_tr,
                                                     "period")

# group to reduce calculation load
visit = Grouping.to_group(visit_tr2,
                          ["USER_ID", "genre", "small_area", "period"], True)
candidate = Grouping.to_group(coupon_tr, ["genre", "small_area", "period"],
                              True)
active = Grouping.to_group(visit_tr2, ["USER_ID", "period"], False)

# numer
numer = visit.copy()
numer = Grouping.to_group_count(numer,
                                ["USER_ID", "genre", "small_area", "period"])

# denom
denom = candidate.merge(active, on="period")
denom = Grouping.to_group_count(denom,
                                ["USER_ID", "genre", "small_area", "period"])

# exclude non-spot genre and same name with prefecture
# load files
users = pd.read_pickle("../model/users.pkl")
coupons = pd.read_pickle("../model/coupons.pkl")
coupon_tr = pd.read_pickle("../model/coupon_tr.pkl")
coupon_te = pd.read_pickle("../model/coupon_te.pkl")
detail_tr = pd.read_pickle("../model/detail_tr.pkl")

# train data ---------------------------------

# exclude non_spot genre from training data
coupon_tr2 = coupon_tr[coupon_tr.genre.isin(Utility.spot_genre)].copy()
detail_tr2 = detail_tr[detail_tr.COUPON_ID.isin(coupon_tr2.COUPON_ID)].copy()

# user/coupon pairs, purchase occured
bought = detail_tr2.copy()
bought = Grouping.to_group(bought, ["USER_ID", "COUPON_ID"], False)

# user/period pairs, purchase occured in the period
active = bought.copy()
active = active.merge(coupon_tr2[["COUPON_ID","period"]], on = ["COUPON_ID"])
active = Grouping.to_group(active, ["USER_ID", "period"], False)

def random_index(all_count, sample_count, seed):
    """Create integer list randomly, sample_count out of all_count."""
    np.random.seed(seed)
    rd = np.random.rand(all_count)
    idxes = np.argsort(rd)
    return idxes[:sample_count]

# negative samples - coupons * users active in the period
traind = coupon_tr2[["COUPON_ID","period"]].copy()
import numpy as np
from util import Utility, Grouping
from util_logger import get_logger
LOG = get_logger()
LOG.info("start b11b")

# load files
detail_tr = pd.read_pickle("../model/detail_tr.pkl")
coupon_tr = pd.read_pickle("../model/coupon_tr.pkl")
users = pd.read_pickle("../model/users.pkl")

# calculate numer and denom --------------------------------

# remove duplicate
bought = detail_tr.copy()
bought = Grouping.to_group(bought, ["USER_ID", "COUPON_ID"], False)

# denom
active = bought.copy()
active = active.merge(coupon_tr, on = ["COUPON_ID"])
active = Grouping.to_group(active, ["USER_ID","period"], False)

cpntr2 = Grouping.to_group(coupon_tr,["small_area","genre","period"], True)

denom = active.copy()
denom = denom.merge(users, on="USER_ID")
denom = denom[["USER_ID", "period"]]
denom = denom.merge(cpntr2, on = ["period"])
denom = denom[["USER_ID","small_area","genre","count", "period"]].copy()

# numer
Ejemplo n.º 6
0
import pandas as pd
import numpy as np
from util import Grouping, Process
from util_logger import get_logger
LOG = get_logger()
LOG.info("start b14")

# load files
users = pd.read_pickle("../model/users.pkl")
coupons = pd.read_pickle("../model/coupons.pkl")
coupon_tr = pd.read_pickle("../model/coupon_tr.pkl")
detail_tr = pd.read_pickle("../model/detail_tr.pkl")

# count purchases per (user, couponkey) 
bought = detail_tr.copy()
bought = Grouping.to_group(bought, ["USER_ID", "COUPON_ID"], False)
bought = bought.merge(coupon_tr, on="COUPON_ID", how='left')
bought["key1"] = Process.to_key1(bought)

# remove duplication
bought = Grouping.to_group(bought, ["USER_ID", "period", "key1"], False) 

# count purchases per (user, couponkey) 
past_buy_key        = Grouping.to_group(bought, ["USER_ID", "key1"], True)
past_buy_key        = past_buy_key.set_index(["USER_ID", "key1"])

past_buy_key_period = Grouping.to_group(bought, ["USER_ID", "key1", "period"], True)
past_buy_key_period = past_buy_key_period.set_index(["USER_ID", "key1", "period"])

# write
past_buy_key.to_pickle("../model/past_buy_key.pkl")
# load files
users = pd.read_pickle("../model/users.pkl")
coupons = pd.read_pickle("../model/coupons.pkl")
coupon_tr = pd.read_pickle("../model/coupon_tr.pkl")
coupon_te = pd.read_pickle("../model/coupon_te.pkl")
detail_tr = pd.read_pickle("../model/detail_tr.pkl")

# train data ---------------------------------

# exclude non_spot genre from training data
coupon_tr2 = coupon_tr[coupon_tr.genre.isin(Utility.spot_genre)].copy()
detail_tr2 = detail_tr[detail_tr.COUPON_ID.isin(coupon_tr2.COUPON_ID)].copy()

# user/coupon pairs, purchase occured
bought = detail_tr2.copy()
bought = Grouping.to_group(bought, ["USER_ID", "COUPON_ID"], False)

# user/period pairs, purchase occured in the period
active = bought.copy()
active = active.merge(coupon_tr2[["COUPON_ID", "period"]], on=["COUPON_ID"])
active = Grouping.to_group(active, ["USER_ID", "period"], False)


def random_index(all_count, sample_count, seed):
    """Create integer list randomly, sample_count out of all_count."""
    np.random.seed(seed)
    rd = np.random.rand(all_count)
    idxes = np.argsort(rd)
    return idxes[:sample_count]