Exemple #1
0
def get_genre_week_exp_feat(label, key, n_day, data_dict):
    data = data_dict['data']
    start_date = date_add_days(key[0], -n_day)
    data_temp = data[(data.visit_date < key[0])
                     & (data.visit_date > start_date)].copy()
    data_temp['visit_date'] = data_temp['visit_date'].apply(
        lambda x: diff_of_days(key[0], x))
    data_temp['weight'] = data_temp['visit_date'].apply(lambda x: 0.985**x)
    data_temp['visitors'] = data_temp['visitors'] * data_temp['weight']
    result1 = data_temp.groupby(['air_genre_name', 'dow'],
                                as_index=False)['visitors'].agg({
                                    'genre_dow_exp_mean{}'.format(n_day):
                                    'sum'
                                })
    result2 = data_temp.groupby(['air_genre_name', 'dow'],
                                as_index=False)['weight'].agg({
                                    'genre_dow_exp_weight_sum{}'.format(n_day):
                                    'sum'
                                })
    result = result1.merge(result2, on=['air_genre_name', 'dow'], how='left')
    result['genre_dow_exp_mean{}'.format(
        n_day)] = result['genre_dow_exp_mean{}'.format(n_day)] / result[
            'genre_dow_exp_weight_sum{}'.format(n_day)]
    result = left_merge(label, result, on=['air_genre_name', 'dow']).fillna(0)
    return result
Exemple #2
0
def get_store_holiday_feat(label, key, n_day, data_dict):
    data = data_dict['data']
    start_date = date_add_days(key[0], -n_day)
    data_temp = data[(data.visit_date < key[0])
                     & (data.visit_date > start_date)].copy()
    result1 = data_temp.groupby(['store_id', 'holiday_flg'],
                                as_index=False)['visitors'].agg({
                                    'store_holiday_min{}'.format(n_day):
                                    'min',
                                    'store_holiday_mean{}'.format(n_day):
                                    'mean',
                                    'store_holiday_median{}'.format(n_day):
                                    'median',
                                    'store_holiday_max{}'.format(n_day):
                                    'max',
                                    'store_holiday_count{}'.format(n_day):
                                    'count',
                                    'store_holiday_std{}'.format(n_day):
                                    'std',
                                    'store_holiday_skew{}'.format(n_day):
                                    'skew'
                                })
    result1 = left_merge(label, result1, on=['store_id',
                                             'holiday_flg']).fillna(0)
    result2 = data_temp.groupby(['store_id', 'holiday_flg2'],
                                as_index=False)['visitors'].agg({
                                    'store_holiday2_min{}'.format(n_day):
                                    'min',
                                    'store_holiday2_mean{}'.format(n_day):
                                    'mean',
                                    'store_holiday2_median{}'.format(n_day):
                                    'median',
                                    'store_holiday2_max{}'.format(n_day):
                                    'max',
                                    'store_holiday2_count{}'.format(n_day):
                                    'count',
                                    'store_holiday2_std{}'.format(n_day):
                                    'std',
                                    'store_holiday2_skew{}'.format(n_day):
                                    'skew'
                                })
    result2 = left_merge(label, result2, on=['store_id',
                                             'holiday_flg2']).fillna(0)
    result = pd.concat([result1, result2], axis=1)
    return result
Exemple #3
0
def get_first_last_time_feat(label, key, n_day, data_dict):
    data = data_dict['data']
    start_date = date_add_days(key[0], -n_day)
    data_temp = data[(data.visit_date < key[0])
                     & (data.visit_date > start_date)].copy()
    data_temp = data_temp.sort_values('visit_date')
    result = data_temp.groupby('store_id')['visit_date'].agg({
        'first_time':
        lambda x: diff_of_days(key[0], np.min(x)),
        'last_time':
        lambda x: diff_of_days(key[0], np.max(x)),
    })
    result = left_merge(label, result, on=['store_id']).fillna(0)
    return result
Exemple #4
0
def get_store_week_exp_feat(label, key, n_day, data_dict):
    data = data_dict['data']
    start_date = date_add_days(key[0], -n_day)
    data_temp = data[(data.visit_date < key[0])
                     & (data.visit_date > start_date)].copy()
    data_temp['visit_date'] = data_temp['visit_date'].apply(
        lambda x: diff_of_days(key[0], x))
    data_temp['visitors2'] = data_temp['visitors']
    result = None
    for i in [0.9, 0.95, 0.97, 0.98, 0.985, 0.99, 0.999, 0.9999]:
        data_temp['weight'] = data_temp['visit_date'].apply(lambda x: i**x)
        data_temp['visitors1'] = data_temp['visitors'] * data_temp['weight']
        data_temp['visitors2'] = data_temp['visitors2'] * data_temp['weight']
        result1 = data_temp.groupby(
            ['store_id', 'dow'], as_index=False)['visitors1'].agg(
                {'store_dow_exp_mean{}_{}'.format(n_day, i): 'sum'})
        result3 = data_temp.groupby(
            ['store_id', 'dow'], as_index=False)['visitors2'].agg(
                {'store_dow_exp_mean2{}_{}'.format(n_day, i): 'sum'})
        result2 = data_temp.groupby(
            ['store_id', 'dow'], as_index=False)['weight'].agg(
                {'store_dow_exp_weight_sum{}_{}'.format(n_day, i): 'sum'})
        result_temp = result1.merge(result2,
                                    on=['store_id', 'dow'],
                                    how='left')
        result_temp = result_temp.merge(result3,
                                        on=['store_id', 'dow'],
                                        how='left')
        result_temp['store_dow_exp_mean{}_{}'.format(
            n_day, i)] = result_temp['store_dow_exp_mean{}_{}'.format(
                n_day,
                i)] / result_temp['store_dow_exp_weight_sum{}_{}'.format(
                    n_day, i)]
        result_temp['store_dow_exp_mean2{}_{}'.format(
            n_day, i)] = result_temp['store_dow_exp_mean2{}_{}'.format(
                n_day,
                i)] / result_temp['store_dow_exp_weight_sum{}_{}'.format(
                    n_day, i)]
        if result is None:
            result = result_temp
        else:
            result = result.merge(result_temp,
                                  on=['store_id', 'dow'],
                                  how='left')
    result = left_merge(label, result, on=['store_id', 'dow']).fillna(0)
    return result
Exemple #5
0
def get_store_week_diff_feat(label, key, n_day, data_dict):
    data = data_dict['data']
    start_date = date_add_days(key[0], -n_day)
    data_temp = data[(data.visit_date < key[0])
                     & (data.visit_date > start_date)].copy()
    result = data_temp.set_index(['store_id',
                                  'visit_date'])['visitors'].unstack()
    result = result.diff(axis=1).iloc[:, 1:]
    c = result.columns
    result['store_diff_mean'] = np.abs(result[c]).mean(axis=1)
    result['store_diff_std'] = result[c].std(axis=1)
    result['store_diff_max'] = result[c].max(axis=1)
    result['store_diff_min'] = result[c].min(axis=1)
    result = left_merge(label,
                        result[[
                            'store_diff_mean', 'store_diff_std',
                            'store_diff_max', 'store_diff_min'
                        ]],
                        on=['store_id']).fillna(0)
    return result
Exemple #6
0
def get_store_visitor_feat(label, key, n_day, data_dict):
    data = data_dict['data']
    start_date = date_add_days(key[0], -n_day)
    data_temp = data[(data.visit_date < key[0])
                     & (data.visit_date > start_date)].copy()
    result = data_temp.groupby(['store_id'], as_index=False)['visitors'].agg({
        'store_min{}'.format(n_day):
        'min',
        'store_mean{}'.format(n_day):
        'mean',
        'store_median{}'.format(n_day):
        'median',
        'store_max{}'.format(n_day):
        'max',
        'store_count{}'.format(n_day):
        'count',
        'store_std{}'.format(n_day):
        'std',
        'store_skew{}'.format(n_day):
        'skew'
    })
    result = left_merge(label, result, on=['store_id']).fillna(0)
    return result
Exemple #7
0
def get_store_all_week_feat(label, key, n_day, data_dict):
    data = data_dict['data']
    start_date = date_add_days(key[0], -n_day)
    data_temp = data[(data.visit_date < key[0])
                     & (data.visit_date > start_date)].copy()
    result_temp = data_temp.groupby(['store_id', 'dow'],
                                    as_index=False)['visitors'].agg({
                                        'store_dow_mean{}'.format(n_day):
                                        'mean',
                                        'store_dow_median{}'.format(n_day):
                                        'median',
                                        'store_dow_sum{}'.format(n_day):
                                        'max',
                                        'store_dow_count{}'.format(n_day):
                                        'count'
                                    })
    result = pd.DataFrame()
    for i in range(7):
        result_sub = result_temp[result_temp['dow'] == i].copy()
        result_sub = result_sub.set_index('store_id')
        result_sub = result_sub.add_prefix(str(i))
        result_sub = left_merge(label, result_sub, on=['store_id']).fillna(0)
        result = pd.concat([result, result_sub], axis=1)
    return result
Exemple #8
0
def get_reserve_feat(label, key, _, data_dict):
    data = data_dict['data']
    air_reserve = data_dict['air_reserve']
    hpg_reserve = data_dict['hpg_reserve']
    air_store = data_dict['air_store']

    label_end_date = date_add_days(key[0], key[1])
    air_reserve_temp = air_reserve[(air_reserve.visit_date >= key[0])
                                   &  # key[0] 是'2017-04-23'
                                   # label_end_date 是'2017-05-31'
                                   (air_reserve.visit_date < label_end_date) &
                                   (air_reserve.reserve_date < key[0])].copy()
    air_reserve_temp = air_reserve_temp.merge(air_store,
                                              on='store_id',
                                              how='left')
    air_reserve_temp['diff_time'] = (
        pd.to_datetime(air_reserve['visit_datetime']) -
        pd.to_datetime(air_reserve['reserve_datetime'])).dt.days
    air_reserve_temp = air_reserve_temp.merge(air_store, on='store_id')
    air_result = air_reserve_temp.groupby(['store_id', 'visit_date'
                                           ])['reserve_visitors'].agg({
                                               'air_reserve_visitors':
                                               'sum',
                                               'air_reserve_count':
                                               'count'
                                           })
    air_store_diff_time_mean = air_reserve_temp.groupby(
        ['store_id',
         'visit_date'])['diff_time'].agg({'air_store_diff_time_mean': 'mean'})
    air_diff_time_mean = air_reserve_temp.groupby(
        ['visit_date'])['diff_time'].agg({'air_diff_time_mean': 'mean'})
    air_result = air_result.unstack().fillna(0).stack()
    air_date_result = air_reserve_temp.groupby(['visit_date'
                                                ])['reserve_visitors'].agg({
                                                    'air_date_visitors':
                                                    'sum',
                                                    'air_date_count':
                                                    'count'
                                                })
    hpg_reserve_temp = hpg_reserve[(hpg_reserve.visit_date >= key[0])
                                   & (hpg_reserve.visit_date < label_end_date)
                                   &
                                   (hpg_reserve.reserve_date < key[0])].copy()
    hpg_reserve_temp['diff_time'] = (
        pd.to_datetime(hpg_reserve['visit_datetime']) -
        pd.to_datetime(hpg_reserve['reserve_datetime'])).dt.days
    hpg_result = hpg_reserve_temp.groupby(['store_id', 'visit_date'
                                           ])['reserve_visitors'].agg({
                                               'hpg_reserve_visitors':
                                               'sum',
                                               'hpg_reserve_count':
                                               'count'
                                           })
    hpg_result = hpg_result.unstack().fillna(0).stack()
    hpg_date_result = hpg_reserve_temp.groupby(['visit_date'
                                                ])['reserve_visitors'].agg({
                                                    'hpg_date_visitors':
                                                    'sum',
                                                    'hpg_date_count':
                                                    'count'
                                                })
    hpg_store_diff_time_mean = hpg_reserve_temp.groupby(
        ['store_id',
         'visit_date'])['diff_time'].agg({'hpg_store_diff_time_mean': 'mean'})
    hpg_diff_time_mean = hpg_reserve_temp.groupby(
        ['visit_date'])['diff_time'].agg({'hpg_diff_time_mean': 'mean'})
    air_result = left_merge(label, air_result, on=['store_id',
                                                   'visit_date']).fillna(0)
    air_store_diff_time_mean = left_merge(label,
                                          air_store_diff_time_mean,
                                          on=['store_id',
                                              'visit_date']).fillna(0)
    hpg_result = left_merge(label, hpg_result, on=['store_id',
                                                   'visit_date']).fillna(0)
    hpg_store_diff_time_mean = left_merge(label,
                                          hpg_store_diff_time_mean,
                                          on=['store_id',
                                              'visit_date']).fillna(0)
    air_date_result = left_merge(label, air_date_result,
                                 on=['visit_date']).fillna(0)
    air_diff_time_mean = left_merge(label,
                                    air_diff_time_mean,
                                    on=['visit_date']).fillna(0)
    hpg_date_result = left_merge(label, hpg_date_result,
                                 on=['visit_date']).fillna(0)
    hpg_diff_time_mean = left_merge(label,
                                    hpg_diff_time_mean,
                                    on=['visit_date']).fillna(0)
    result = pd.concat([
        air_result, hpg_result, air_date_result, hpg_date_result,
        air_store_diff_time_mean, hpg_store_diff_time_mean, air_diff_time_mean,
        hpg_diff_time_mean
    ],
                       axis=1)
    return result