Example #1
0
def main():
    args = parse_args()
    seed_everything(args.seed)

    if args.onehot:
        all_data = read_all(directory='../data/05_onehot')
        sequences = read_sequences(directory='../data/06_onehot_seq')
    else:
        all_data = read_all(directory='../data/03_powertransform')
        sequences = read_sequences(directory='../data/04_sequence')
    dims = get_dims(all_data)
    loader_maker = LoaderMaker(all_data, sequences, args, onehot=args.onehot)

    # CV
    name = '15_cnn-onehot' if args.onehot else '15_cnn-label'
    skf = StratifiedKFold(n_splits=5)
    folds = skf.split(all_data['application_train']['SK_ID_CURR'],
                      all_data['application_train']['TARGET'])
    best_models = []
    for train_index, val_index in folds:
        train_dataloader = loader_maker.make(train_index)
        val_dataloader = loader_maker.make(val_index)
        model = LightningModel(R2NCNN(dims, args.n_hidden, args.n_main),
                               nn.BCEWithLogitsLoss(), train_dataloader,
                               val_dataloader, args)
        trainer = HomeCreditTrainer(name, args.n_epochs, args.patience)
        trainer.fit(model)
        best_model = load_model(model, name, trainer.logger.version)
        best_models.append(best_model)

    # Predict
    test_dataloader = loader_maker.make(index=None, train=False)
    df_submission = predict(best_models, test_dataloader)
    filename = '../submission/15_r2n-cnn-onehot.csv' if args.onehot else '../submission/15_r2n-cnn-label.csv'
    df_submission.to_csv(filename, index=False)
Example #2
0
def main():
    args = parse_args()
    seed_everything(args.seed)
    app_train = joblib.load('../data/05_onehot/application_train.joblib')
    app_test = joblib.load('../data/05_onehot/application_test.joblib')
    sequences = read_all('../data/06_onehot_seq/')
    dims = joblib.load('../data/07_dims/dims05.joblib')
    app_dims = {}
    app_dims['application_train'] = dims.pop('application_train')
    app_dims['application_test'] = dims.pop('application_test')

    app_data = {'application_train': app_train, 'application_test': app_test}
    loader_maker = LoaderMaker(app_data, sequences, args, onehot=True)

    skf = StratifiedKFold(n_splits=5)
    folds = skf.split(app_train['SK_ID_CURR'], app_train['TARGET'])
    best_models = []
    for train_index, val_index in folds:
        encoders = pretrain(app_train, app_test, sequences, dims, train_index,
                            val_index, args)
        train_dataloader = loader_maker.make(train_index)
        val_dataloader = loader_maker.make(val_index)
        model = LightningModel(
            PretrainedR2N(app_dims, args.n_hidden, args.n_main, encoders),
            nn.BCEWithLogitsLoss(), train_dataloader, val_dataloader, args)
        name = '82_vaelstm_fine'
        trainer = HomeCreditTrainer(name, args.n_epochs, args.patience)
        trainer.fit(model)
        best_model = load_model(model, name, trainer.logger.version)
        best_models.append(best_model)

    # Predict
    test_dataloader = loader_maker.make(index=None, train=False)
    df_submission = predict(best_models, test_dataloader)
    df_submission.to_csv(f'../submission/{name}.csv', index=False)
Example #3
0
def main():
    args = parse_args()
    seed_everything(args.seed)

    if args.onehot:
        app_train = joblib.load('../data/05_onehot/application_train.joblib')
        app_test = joblib.load('../data/05_onehot/application_test.joblib')
        dims = get_dims({'application_train': app_train})
        _, _, cont_dim = dims['application_train']
        n_input = cont_dim
    else:
        app_train = joblib.load(
            '../data/03_powertransform/application_train.joblib')
        app_test = joblib.load(
            '../data/03_powertransform/application_test.joblib')
        dims = get_dims({'application_train': app_train})
        cat_dims, emb_dims, cont_dim = dims['application_train']
        n_input = emb_dims.sum() + cont_dim

    n_hidden = args.n_hidden

    # CV
    skf = StratifiedKFold(n_splits=5)
    folds = skf.split(app_train['SK_ID_CURR'], app_train['TARGET'])
    best_models = []
    for train_index, val_index in folds:
        train_dataloader = make_dataloader(app_train,
                                           train_index,
                                           args.batch_size,
                                           onehot=args.onehot)
        val_dataloader = make_dataloader(app_train,
                                         val_index,
                                         args.batch_size,
                                         onehot=args.onehot)
        if args.onehot:
            network = MLPOneHot(n_input, n_hidden)
        else:
            network = MLP(cat_dims, emb_dims, n_input, n_hidden)
        model = LightningModel(network, nn.BCEWithLogitsLoss(),
                               train_dataloader, val_dataloader, args)
        name = '13_mlp-onehot' if args.onehot else '13_mlp-label'
        trainer = HomeCreditTrainer(name, args.n_epochs, args.patience)
        trainer.fit(model)

        best_model = load_model(model, name, trainer.logger.version)
        best_models.append(best_model)

    # Predict
    test_dataloader = make_dataloader(app_test,
                                      None,
                                      args.batch_size,
                                      train=False,
                                      onehot=args.onehot)
    df_submission = predict(best_models, test_dataloader)
    filename = '../submission/13_mlp-onehot.csv' if args.onehot else '../submission/13_mlp-label.csv'
    df_submission.to_csv(filename, index=False)