def last_k_instalment_features_with_fractions(gr, periods, fraction_periods): gr_ = gr.copy() features = {} features_temp = {} for period in periods: gr_period = gr_[gr_['days'] <= period] features_temp = utils.add_features_in_group( features_temp, gr_period, 'installments', ['mean', 'var', 'skew', 'kurt', 'iqr'], 'last_{}_'.format(period)) features_temp = utils.add_features_in_group( features_temp, gr_period, 'purchase_amount', ['sum', 'max', 'mean', 'var', 'skew', 'kurt', 'iqr'], 'last_{}_'.format(period)) for short_period, long_period in fraction_periods: short_feature_names = utils._get_feature_names(features_temp, short_period) long_feature_names = utils._get_feature_names(features_temp, long_period) for short_feature, long_feature in zip(short_feature_names, long_feature_names): old_name_chunk = '_{}_'.format(short_period) new_name_chunk = '_{}by{}_fraction_'.format( short_period, long_period) fraction_feature_name = short_feature.replace( old_name_chunk, new_name_chunk) features[fraction_feature_name] = utils.safe_div( features_temp[short_feature], features_temp[long_feature]) return pd.Series(features)
def _installments_last_loan_features(gr): """Return statistics for the last loan. Returns the sum, mean, max and std for the last loan in the given group, which is usually grouped by customer id. Arguments: gr: Pandas groupby object. Returns: features: dictionary with features. """ gr_ = gr.copy() gr_.sort_values(['DAYS_INSTALMENT'], ascending=False, inplace=True) last_installment_id = gr_['SK_ID_PREV'].iloc[0] gr_ = gr_[gr_['SK_ID_PREV'] == last_installment_id] features = {} features = utils.add_features_in_group(features, gr_, 'DPD', ['sum', 'mean', 'max', 'std'], 'LAST_LOAN_') features = utils.add_features_in_group(features, gr_, 'LATE_PAYMENT', ['count', 'mean'], 'LAST_LOAN_') features = utils.add_features_in_group(features, gr_, 'PAID_OVER_AMOUNT', ['sum', 'mean', 'max', 'min', 'std'], 'LAST_LOAN_') features = utils.add_features_in_group(features, gr_, 'PAID_OVER', ['count', 'mean'], 'LAST_LOAN_') return features
def last_k_instalment_features(gr, periods): gr_ = gr.copy() features = {} for period in periods: gr_period = gr_[gr_['days'] <= period] features = utils.add_features_in_group(features, gr_period, 'installments', ['max', 'mean', 'var', 'skew'], 'hist_last_{}_'.format(period)) features = utils.add_features_in_group( features, gr_period, 'purchase_amount', ['sum', 'max', 'min', 'mean', 'var', 'skew'], 'hist_last_{}_'.format(period)) return features
def last_link_time_features(gr,periods): gr_ = gr.copy() gr_ = gr_.iloc[::-1] features = {} for period in periods: if period > 10e5: period_name = 'zsl_all_' gr_period = gr_.copy() else: period_name = 'zsl_last_{}_'.format(period) gr_period = gr_.iloc[:period] features = add_features_in_group(features, gr_period, 'link_time', ['max', 'sum', 'mean','min','skew','std'], period_name) features = add_features_in_group(features, gr_period, 'link_current_status', ['mean', 'nunique'], period_name) return features
def last_k_link_time_interval(gr, periods): gr_ = gr.copy() gr_ = gr_.iloc[::-1] gr_['t_i_v'] = gr_['link_time'].diff() gr_['t_i_v'] = gr_['t_i_v'] gr_['t_i_v'] = gr_['t_i_v'].fillna(0) gr_['c_s_v'] = gr_['link_current_status'].diff() gr_['c_s_v'] = gr_['c_s_v'] gr_['c_s_v'] = gr_['c_s_v'].fillna(0) gr_ = gr_.drop_duplicates().reset_index(drop = True) # link time变化 features = {} for period in periods: if period > 10e5: period_name = 'zsl_link_time_interval_all' gr_period = gr_.copy() else: period_name = 'zsl_link_time_interval_last_{}_'.format(period) gr_period = gr_.iloc[:period] features = add_features_in_group(features, gr_period, 't_i_v', ['mean','max', 'min', 'std','skew','sum'], # ['diff'], period_name) # current status变化 for period in periods: if period > 10e5: period_name = 'zsl_link_current_status_interval_all' gr_period = gr_.copy() else: period_name = 'zsl_link_current_status_interval_last_{}_'.format(period) gr_period = gr_.iloc[:period] features = add_features_in_group(features, gr_period, 'c_s_v', ['mean', 'std', 'skew'], # ['diff'], period_name) return features