예제 #1
0
            'DBD': ['max', 'mean', 'sum'],
            'AMT_INSTALMENT_sub_AMT_PAYMENT': ['max', 'mean', 'sum', 'var'],
            'AMT_INSTALMENT_div_AMT_PAYMENT': ['max', 'mean', 'sum', 'var'],
            'AMT_INSTALMENT': ['max', 'mean', 'sum'],
            'AMT_PAYMENT': ['min', 'max', 'mean', 'sum'],
            'DAYS_ENTRY_PAYMENT': ['max', 'mean', 'sum']
        }
        via = df.groupby('SK_ID_PREV').agg(aggs).reset_index(drop=True)
        via.columns = [
            '_'.join(f) if f[0] != 'SK_ID_CURR' else f[0] for f in via.columns
        ]
        self.df = via.groupby('SK_ID_CURR').mean()


if __name__ == '__main__':
    args = get_arguments('POS CASH')
    with timer('load dataset'):
        train = pd.read_feather(TRAIN)[['SK_ID_CURR']]
        test = pd.read_feather(TEST)[['SK_ID_CURR']]
        inst = pd.read_feather(INST)

    with timer('preprocessing'):
        # inst.drop(['SK_ID_PREV'], axis=1, inplace=True)
        inst = inst.sort_values(['SK_ID_CURR',
                                 'DAYS_INSTALMENT']).reset_index(drop=True)
        inst.loc[:, inst.columns.str.startswith('AMT_')] = np.log1p(
            inst.filter(regex='^(AMT_)'))
        inst.loc[:, inst.columns.str.startswith('DAYS_')] = inst.filter(
            regex='^DAYS_').replace({365243: np.nan})

    with timer('create dataset'):
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from feat import get_arguments

FILES = ['main.py', 'bureau.py', 'credit_card_balance.py', 'installments_payment.py', 'pos_cash_balance.py',
         'previous_application.py']

if __name__ == '__main__':
    args = get_arguments('all')
    for file in FILES:
        cmd = ['python', '-u', file]
        if args.force:
            cmd += ['-f']
        os.system(' '.join(cmd))