Ejemplo n.º 1
0
 def killAll():
     '''结束所有记录的进程
     '''
     from util import Process
     for app in App._totalapps:
         Process(app.ProcessId).terminate()
     del App._totalapps[:]
Ejemplo n.º 2
0
def add_past_purchase_key(_df):
    """
        Add purchased couponkey.
          - purchased couponkey is purchases of coupons with the same couponkey by the user.
          - exclude the information in the period where the coupon is.
    """
    key1 = Process.to_key1(_df)
    idx1 = zip(_df.USER_ID, key1)
    idx2 = zip(_df.USER_ID, key1, _df.period)

    pivot = past_buy_key
    pivot_period = past_buy_key_period

    v1 = pivot.reindex(idx1).values
    v1 = np.where(np.isnan(v1), 0, v1)  
    v2 = pivot_period.reindex(idx2).values
    v2 = np.where(np.isnan(v2), 0, v2)      
    values = v1 - v2

    _df2 = pd.DataFrame(values, index=_df.index, columns=["past_key"])
    return pd.concat([_df, _df2], axis=1)
def add_past_purchase_key(_df):
    """
        Add purchased couponkey.
          - purchased couponkey is purchases of coupons with the same couponkey by the user.
          - exclude the information in the period where the coupon is.
    """
    key1 = Process.to_key1(_df)
    idx1 = zip(_df.USER_ID, key1)
    idx2 = zip(_df.USER_ID, key1, _df.period)

    pivot = past_buy_key
    pivot_period = past_buy_key_period

    v1 = pivot.reindex(idx1).values
    v1 = np.where(np.isnan(v1), 0, v1)  
    v2 = pivot_period.reindex(idx2).values
    v2 = np.where(np.isnan(v2), 0, v2)      
    values = v1 - v2

    _df2 = pd.DataFrame(values, index=_df.index, columns=["past_key"])
    return pd.concat([_df, _df2], axis=1)
Ejemplo n.º 4
0
# load files
detail_tr = pd.read_pickle("../model/detail_tr.pkl")
coupons = pd.read_pickle("../model/coupons.pkl")
coupon_tr = pd.read_pickle("../model/coupon_tr.pkl")
users = pd.read_pickle("../model/users.pkl")

# count purchases for each coupon ---------------------------

# exclude duplication
detail_tr = detail_tr.groupby(["COUPON_ID","USER_ID"]).size().reset_index().drop(0, axis=1)

detail_cp = detail_tr.groupby("COUPON_ID").size().reset_index().rename(columns={0:"count"})
detail_cp = detail_cp.merge(coupon_tr, on="COUPON_ID")
detail_cp = detail_cp.sort("count")
detail_cp["key1"] = Process.to_key1(detail_cp)

# calculate couponkey and genre popularility ----------------

# couponkey popularity, calculated as mean 
popular_key = detail_cp.groupby("key1")["count"].agg([np.mean, np.size])
popular_key = popular_key.rename(columns={"mean":"key_mean", "size":"key_size"})

# genre popularity, calculated as mean 
popular_genre = detail_cp.groupby("genre")["count"].agg([np.mean, np.size])
popular_genre = popular_genre.rename(columns={"mean":"key_mean", "size":"key_size"})

# couponkey popularity, exclude couponkey of only one sample
popular_key_train = popular_key[popular_key["key_size"] > 1]

# set popularility for each coupon ----------------------
Ejemplo n.º 5
0
from util import Grouping, Process
from util_logger import get_logger
LOG = get_logger()
LOG.info("start b14")

# load files
users = pd.read_pickle("../model/users.pkl")
coupons = pd.read_pickle("../model/coupons.pkl")
coupon_tr = pd.read_pickle("../model/coupon_tr.pkl")
detail_tr = pd.read_pickle("../model/detail_tr.pkl")

# count purchases per (user, couponkey) 
bought = detail_tr.copy()
bought = Grouping.to_group(bought, ["USER_ID", "COUPON_ID"], False)
bought = bought.merge(coupon_tr, on="COUPON_ID", how='left')
bought["key1"] = Process.to_key1(bought)

# remove duplication
bought = Grouping.to_group(bought, ["USER_ID", "period", "key1"], False) 

# count purchases per (user, couponkey) 
past_buy_key        = Grouping.to_group(bought, ["USER_ID", "key1"], True)
past_buy_key        = past_buy_key.set_index(["USER_ID", "key1"])

past_buy_key_period = Grouping.to_group(bought, ["USER_ID", "key1", "period"], True)
past_buy_key_period = past_buy_key_period.set_index(["USER_ID", "key1", "period"])

# write
past_buy_key.to_pickle("../model/past_buy_key.pkl")
past_buy_key_period.to_pickle("../model/past_buy_key_period.pkl")