def killAll(): '''结束所有记录的进程 ''' from util import Process for app in App._totalapps: Process(app.ProcessId).terminate() del App._totalapps[:]
def add_past_purchase_key(_df): """ Add purchased couponkey. - purchased couponkey is purchases of coupons with the same couponkey by the user. - exclude the information in the period where the coupon is. """ key1 = Process.to_key1(_df) idx1 = zip(_df.USER_ID, key1) idx2 = zip(_df.USER_ID, key1, _df.period) pivot = past_buy_key pivot_period = past_buy_key_period v1 = pivot.reindex(idx1).values v1 = np.where(np.isnan(v1), 0, v1) v2 = pivot_period.reindex(idx2).values v2 = np.where(np.isnan(v2), 0, v2) values = v1 - v2 _df2 = pd.DataFrame(values, index=_df.index, columns=["past_key"]) return pd.concat([_df, _df2], axis=1)
# load files detail_tr = pd.read_pickle("../model/detail_tr.pkl") coupons = pd.read_pickle("../model/coupons.pkl") coupon_tr = pd.read_pickle("../model/coupon_tr.pkl") users = pd.read_pickle("../model/users.pkl") # count purchases for each coupon --------------------------- # exclude duplication detail_tr = detail_tr.groupby(["COUPON_ID","USER_ID"]).size().reset_index().drop(0, axis=1) detail_cp = detail_tr.groupby("COUPON_ID").size().reset_index().rename(columns={0:"count"}) detail_cp = detail_cp.merge(coupon_tr, on="COUPON_ID") detail_cp = detail_cp.sort("count") detail_cp["key1"] = Process.to_key1(detail_cp) # calculate couponkey and genre popularility ---------------- # couponkey popularity, calculated as mean popular_key = detail_cp.groupby("key1")["count"].agg([np.mean, np.size]) popular_key = popular_key.rename(columns={"mean":"key_mean", "size":"key_size"}) # genre popularity, calculated as mean popular_genre = detail_cp.groupby("genre")["count"].agg([np.mean, np.size]) popular_genre = popular_genre.rename(columns={"mean":"key_mean", "size":"key_size"}) # couponkey popularity, exclude couponkey of only one sample popular_key_train = popular_key[popular_key["key_size"] > 1] # set popularility for each coupon ----------------------
from util import Grouping, Process from util_logger import get_logger LOG = get_logger() LOG.info("start b14") # load files users = pd.read_pickle("../model/users.pkl") coupons = pd.read_pickle("../model/coupons.pkl") coupon_tr = pd.read_pickle("../model/coupon_tr.pkl") detail_tr = pd.read_pickle("../model/detail_tr.pkl") # count purchases per (user, couponkey) bought = detail_tr.copy() bought = Grouping.to_group(bought, ["USER_ID", "COUPON_ID"], False) bought = bought.merge(coupon_tr, on="COUPON_ID", how='left') bought["key1"] = Process.to_key1(bought) # remove duplication bought = Grouping.to_group(bought, ["USER_ID", "period", "key1"], False) # count purchases per (user, couponkey) past_buy_key = Grouping.to_group(bought, ["USER_ID", "key1"], True) past_buy_key = past_buy_key.set_index(["USER_ID", "key1"]) past_buy_key_period = Grouping.to_group(bought, ["USER_ID", "key1", "period"], True) past_buy_key_period = past_buy_key_period.set_index(["USER_ID", "key1", "period"]) # write past_buy_key.to_pickle("../model/past_buy_key.pkl") past_buy_key_period.to_pickle("../model/past_buy_key_period.pkl")