for day in tqdm(range(18, 26)): count = temp.groupby([feat_1]).apply(lambda x: x['is_trade'][(x['context_date_day'] < day).values].count()).\ reset_index(name=feat_1 + '_all') count1 = temp.groupby([feat_1]).apply(lambda x: x['is_trade'][(x['context_date_day'] < day).values].sum()).\ reset_index(name=feat_1 + '_1') count[feat_1 + '_1'] = count1[feat_1 + '_1'] # TODO: should handle first day conversion count and sum ? count.fillna(value=0, inplace=True) count['context_date_day'] = day res = res.append(count, ignore_index=True) # only smooth item_id and item_brand_id here if feat_1 == 'item_id': print('smoothing item_id') bs_item = BayesianSmoothing(1, 1) bs_item.update(res[feat_1 + '_all'].values, res[feat_1 + '_1'].values, 1000, 0.001) res[feat_1 + '_smooth'] = (res[feat_1 + '_1'] + bs_item.alpha) / \ (res[feat_1 + '_all'] + bs_item.alpha + bs_item.beta) if feat_1 == 'item_brand_id': print('smoothing item_brand_id') bs_brand = BayesianSmoothing(1, 1) bs_brand.update(res[feat_1 + '_all'].values, res[feat_1 + '_1'].values, 1000, 0.001) res[feat_1 + '_smooth'] = (res[feat_1 + '_1'] + bs_brand.alpha) / \ (res[feat_1 + '_all'] + bs_brand.alpha + bs_brand.beta) # all features conversion rate res[feat_1 + '_rate'] = res[feat_1 + '_1'] / res[feat_1 + '_all'] train = train.merge(res, how='left', on=[feat_1, 'context_date_day'])
for day in tqdm(range(18, 26)): count = temp.groupby([feat_1]).apply(lambda x: x['is_trade'][(x['context_date_day'] < day).values].count()).\ reset_index(name=feat_1 + '_all') count1 = temp.groupby([feat_1]).apply(lambda x: x['is_trade'][(x['context_date_day'] < day).values].sum()).\ reset_index(name=feat_1 + '_1') count[feat_1 + '_1'] = count1[feat_1 + '_1'] # TODO: should handle first day conversion count and sum ? count.fillna(value=0, inplace=True) count['context_date_day'] = day res = res.append(count, ignore_index=True) # only smooth shop_id here if feat_1 == 'shop_id': print('smoothing shop_id') bs = BayesianSmoothing(1, 1) bs.update(res[feat_1 + '_all'].values, res[feat_1 + '_1'].values, 1000, 0.001) res[feat_1 + '_smooth'] = (res[feat_1 + '_1'] + bs.alpha) / ( res[feat_1 + '_all'] + bs.alpha + bs.beta) # all features conversion rate res[feat_1 + '_rate'] = res[feat_1 + '_1'] / res[feat_1 + '_all'] train = train.merge(res, how='left', on=[feat_1, 'context_date_day']) test = test.merge(res, how='left', on=[feat_1, 'context_date_day']) if feat_1 == 'shop_id': train['shop_id_smooth'] = train['shop_id_smooth'].fillna( value=bs.alpha / (bs.alpha + bs.beta)) test['shop_id_smooth'] = test['shop_id_smooth'].fillna( value=bs.alpha / (bs.alpha + bs.beta))
for day in tqdm(range(18, 26)): count = temp.groupby(['item_category_1']).apply(lambda x: x['is_trade'][(x['context_date_day'] < day).values].count()).\ reset_index(name='item_category_1' + '_all') count1 = temp.groupby(['item_category_1']).apply(lambda x: x['is_trade'][(x['context_date_day'] < day).values].sum()).\ reset_index(name='item_category_1' + '_1') count['item_category_1' + '_1'] = count1['item_category_1' + '_1'] # TODO: should handle first day conversion count and sum ? count.fillna(value=0, inplace=True) count['context_date_day'] = day res = res.append(count, ignore_index=True) print('smoothing category_id') bs = BayesianSmoothing(1, 1) bs.update(res['item_category_1' + '_all'].values, res['item_category_1' + '_1'].values, 1000, 0.001) res['item_category_1' + '_smooth'] = (res['item_category_1' + '_1'] + bs.alpha) / \ (res['item_category_1' + '_all'] + bs.alpha + bs.beta) print('item_category_1', bs.alpha, bs.beta) # item_category_1 2.16580301337 66.9451051993 # conversion rate res['item_category_1' + '_rate'] = res['item_category_1' + '_1'] / res['item_category_1' + '_all'] res.to_pickle('../features/concat_cate_smt_ctr_feature_304.p') # ================================================ # property part # ================================================