Esempio n. 1
0
def last_k_instalment_features_with_fractions(gr, periods, fraction_periods):
    gr_ = gr.copy()

    features = {}
    features_temp = {}

    for period in periods:
        gr_period = gr_[gr_['days'] <= period]

        features_temp = utils.add_features_in_group(
            features_temp, gr_period, 'installments',
            ['mean', 'var', 'skew', 'kurt', 'iqr'], 'last_{}_'.format(period))

        features_temp = utils.add_features_in_group(
            features_temp, gr_period, 'purchase_amount',
            ['sum', 'max', 'mean', 'var', 'skew', 'kurt', 'iqr'],
            'last_{}_'.format(period))

    for short_period, long_period in fraction_periods:
        short_feature_names = utils._get_feature_names(features_temp,
                                                       short_period)
        long_feature_names = utils._get_feature_names(features_temp,
                                                      long_period)

        for short_feature, long_feature in zip(short_feature_names,
                                               long_feature_names):
            old_name_chunk = '_{}_'.format(short_period)
            new_name_chunk = '_{}by{}_fraction_'.format(
                short_period, long_period)
            fraction_feature_name = short_feature.replace(
                old_name_chunk, new_name_chunk)
            features[fraction_feature_name] = utils.safe_div(
                features_temp[short_feature], features_temp[long_feature])
    return pd.Series(features)
def _installments_last_loan_features(gr):
    """Return statistics for the last loan.

    Returns the sum, mean, max and std for the
    last loan in the given group, which is usually
    grouped by customer id. 

    Arguments:
        gr: Pandas groupby object.

    Returns:
        features: dictionary with features.
    """
    gr_ = gr.copy()
    gr_.sort_values(['DAYS_INSTALMENT'], ascending=False, inplace=True)
    last_installment_id = gr_['SK_ID_PREV'].iloc[0]
    gr_ = gr_[gr_['SK_ID_PREV'] == last_installment_id]

    features = {}
    features = utils.add_features_in_group(features, gr_, 'DPD',
                                     ['sum', 'mean', 'max', 'std'],
                                     'LAST_LOAN_')
    features = utils.add_features_in_group(features, gr_, 'LATE_PAYMENT',
                                     ['count', 'mean'],
                                     'LAST_LOAN_')
    features = utils.add_features_in_group(features, gr_, 'PAID_OVER_AMOUNT',
                                     ['sum', 'mean', 'max', 'min', 'std'],
                                     'LAST_LOAN_')
    features = utils.add_features_in_group(features, gr_, 'PAID_OVER',
                                     ['count', 'mean'],
                                     'LAST_LOAN_')
    return features
Esempio n. 3
0
def last_k_instalment_features(gr, periods):
    gr_ = gr.copy()
    features = {}

    for period in periods:
        gr_period = gr_[gr_['days'] <= period]

        features = utils.add_features_in_group(features, gr_period,
                                               'installments',
                                               ['max', 'mean', 'var', 'skew'],
                                               'hist_last_{}_'.format(period))

        features = utils.add_features_in_group(
            features, gr_period, 'purchase_amount',
            ['sum', 'max', 'min', 'mean', 'var', 'skew'],
            'hist_last_{}_'.format(period))
    return features
def last_link_time_features(gr,periods):
    gr_ = gr.copy()
    gr_ = gr_.iloc[::-1]
    features = {}
    for period in periods:
        if period > 10e5:
            period_name = 'zsl_all_'
            gr_period = gr_.copy()
        else:
            period_name = 'zsl_last_{}_'.format(period)
            gr_period = gr_.iloc[:period]
        features = add_features_in_group(features, gr_period, 'link_time',
                                     ['max', 'sum', 'mean','min','skew','std'],
                                     period_name)
        features = add_features_in_group(features, gr_period, 'link_current_status',
                                         ['mean', 'nunique'],
                                         period_name)
    return features
def last_k_link_time_interval(gr, periods):
    gr_ = gr.copy()
    gr_ = gr_.iloc[::-1]
    gr_['t_i_v'] = gr_['link_time'].diff()
    gr_['t_i_v'] = gr_['t_i_v']
    gr_['t_i_v'] = gr_['t_i_v'].fillna(0)

    gr_['c_s_v'] = gr_['link_current_status'].diff()
    gr_['c_s_v'] = gr_['c_s_v']
    gr_['c_s_v'] = gr_['c_s_v'].fillna(0)

    gr_ = gr_.drop_duplicates().reset_index(drop = True)

    # link time变化
    features = {}
    for period in periods:
        if period > 10e5:
            period_name = 'zsl_link_time_interval_all'
            gr_period = gr_.copy()
        else:
            period_name = 'zsl_link_time_interval_last_{}_'.format(period)
            gr_period = gr_.iloc[:period]
        features = add_features_in_group(features, gr_period, 't_i_v',
                                             ['mean','max', 'min', 'std','skew','sum'],
                                             # ['diff'],
                                             period_name)
    # current status变化
    for period in periods:
        if period > 10e5:
            period_name = 'zsl_link_current_status_interval_all'
            gr_period = gr_.copy()
        else:
            period_name = 'zsl_link_current_status_interval_last_{}_'.format(period)
            gr_period = gr_.iloc[:period]
        features = add_features_in_group(features, gr_period, 'c_s_v',
                                     ['mean', 'std', 'skew'],
                                     # ['diff'],
                                     period_name)
    return features