Ejemplo n.º 1
0
    for day in tqdm(range(18, 26)):
        count = temp.groupby([feat_1]).apply(lambda x: x['is_trade'][(x['context_date_day'] < day).values].count()).\
            reset_index(name=feat_1 + '_all')
        count1 = temp.groupby([feat_1]).apply(lambda x: x['is_trade'][(x['context_date_day'] < day).values].sum()).\
            reset_index(name=feat_1 + '_1')
        count[feat_1 + '_1'] = count1[feat_1 + '_1']
        # TODO: should handle first day conversion count and sum ?
        count.fillna(value=0, inplace=True)
        count['context_date_day'] = day
        res = res.append(count, ignore_index=True)

    # only smooth item_id and item_brand_id here
    if feat_1 == 'item_id':
        print('smoothing item_id')
        bs_item = BayesianSmoothing(1, 1)
        bs_item.update(res[feat_1 + '_all'].values, res[feat_1 + '_1'].values,
                       1000, 0.001)
        res[feat_1 + '_smooth'] = (res[feat_1 + '_1'] + bs_item.alpha) / \
                                  (res[feat_1 + '_all'] + bs_item.alpha + bs_item.beta)

    if feat_1 == 'item_brand_id':
        print('smoothing item_brand_id')
        bs_brand = BayesianSmoothing(1, 1)
        bs_brand.update(res[feat_1 + '_all'].values, res[feat_1 + '_1'].values,
                        1000, 0.001)
        res[feat_1 + '_smooth'] = (res[feat_1 + '_1'] + bs_brand.alpha) / \
                                  (res[feat_1 + '_all'] + bs_brand.alpha + bs_brand.beta)

    # all features conversion rate
    res[feat_1 + '_rate'] = res[feat_1 + '_1'] / res[feat_1 + '_all']
Ejemplo n.º 2
0
    for day in tqdm(range(18, 26)):
        count = temp.groupby([feat_1]).apply(lambda x: x['is_trade'][(x['context_date_day'] < day).values].count()).\
            reset_index(name=feat_1 + '_all')
        count1 = temp.groupby([feat_1]).apply(lambda x: x['is_trade'][(x['context_date_day'] < day).values].sum()).\
            reset_index(name=feat_1 + '_1')
        count[feat_1 + '_1'] = count1[feat_1 + '_1']
        # TODO: should handle first day conversion count and sum ?
        count.fillna(value=0, inplace=True)
        count['context_date_day'] = day
        res = res.append(count, ignore_index=True)

    # only smooth shop_id here
    if feat_1 == 'shop_id':
        print('smoothing shop_id')
        bs = BayesianSmoothing(1, 1)
        bs.update(res[feat_1 + '_all'].values, res[feat_1 + '_1'].values, 1000,
                  0.001)
        res[feat_1 + '_smooth'] = (res[feat_1 + '_1'] + bs.alpha) / (
            res[feat_1 + '_all'] + bs.alpha + bs.beta)

    # all features conversion rate
    res[feat_1 + '_rate'] = res[feat_1 + '_1'] / res[feat_1 + '_all']

    train = train.merge(res, how='left', on=[feat_1, 'context_date_day'])
    test = test.merge(res, how='left', on=[feat_1, 'context_date_day'])

    if feat_1 == 'shop_id':
        train['shop_id_smooth'] = train['shop_id_smooth'].fillna(
            value=bs.alpha / (bs.alpha + bs.beta))
        test['shop_id_smooth'] = test['shop_id_smooth'].fillna(
temp = tmp_category[['item_category_1', 'context_date_day', 'is_trade']]

for day in tqdm(range(18, 26)):
    count = temp.groupby(['item_category_1']).apply(lambda x: x['is_trade'][(x['context_date_day'] < day).values].count()).\
        reset_index(name='item_category_1' + '_all')
    count1 = temp.groupby(['item_category_1']).apply(lambda x: x['is_trade'][(x['context_date_day'] < day).values].sum()).\
        reset_index(name='item_category_1' + '_1')
    count['item_category_1' + '_1'] = count1['item_category_1' + '_1']

    # TODO: should handle first day conversion count and sum ?
    count.fillna(value=0, inplace=True)
    count['context_date_day'] = day
    res = res.append(count, ignore_index=True)

print('smoothing category_id')
bs = BayesianSmoothing(1, 1)
bs.update(res['item_category_1' + '_all'].values,
          res['item_category_1' + '_1'].values, 1000, 0.001)
res['item_category_1' + '_smooth'] = (res['item_category_1' + '_1'] + bs.alpha) / \
                                     (res['item_category_1' + '_all'] + bs.alpha + bs.beta)
print('item_category_1', bs.alpha, bs.beta)
# item_category_1 2.16580301337 66.9451051993

# conversion rate
res['item_category_1' +
    '_rate'] = res['item_category_1' + '_1'] / res['item_category_1' + '_all']

res.to_pickle('../features/concat_cate_smt_ctr_feature_304.p')

# ================================================
#               property part