def main(): args = parse_args() seed_everything(args.seed) if args.onehot: all_data = read_all(directory='../data/05_onehot') sequences = read_sequences(directory='../data/06_onehot_seq') else: all_data = read_all(directory='../data/03_powertransform') sequences = read_sequences(directory='../data/04_sequence') dims = get_dims(all_data) loader_maker = LoaderMaker(all_data, sequences, args, onehot=args.onehot) # CV name = '15_cnn-onehot' if args.onehot else '15_cnn-label' skf = StratifiedKFold(n_splits=5) folds = skf.split(all_data['application_train']['SK_ID_CURR'], all_data['application_train']['TARGET']) best_models = [] for train_index, val_index in folds: train_dataloader = loader_maker.make(train_index) val_dataloader = loader_maker.make(val_index) model = LightningModel(R2NCNN(dims, args.n_hidden, args.n_main), nn.BCEWithLogitsLoss(), train_dataloader, val_dataloader, args) trainer = HomeCreditTrainer(name, args.n_epochs, args.patience) trainer.fit(model) best_model = load_model(model, name, trainer.logger.version) best_models.append(best_model) # Predict test_dataloader = loader_maker.make(index=None, train=False) df_submission = predict(best_models, test_dataloader) filename = '../submission/15_r2n-cnn-onehot.csv' if args.onehot else '../submission/15_r2n-cnn-label.csv' df_submission.to_csv(filename, index=False)
def main(): args = parse_args() seed_everything(args.seed) app_train = joblib.load('../data/05_onehot/application_train.joblib') app_test = joblib.load('../data/05_onehot/application_test.joblib') sequences = read_all('../data/06_onehot_seq/') dims = joblib.load('../data/07_dims/dims05.joblib') app_dims = {} app_dims['application_train'] = dims.pop('application_train') app_dims['application_test'] = dims.pop('application_test') app_data = {'application_train': app_train, 'application_test': app_test} loader_maker = LoaderMaker(app_data, sequences, args, onehot=True) skf = StratifiedKFold(n_splits=5) folds = skf.split(app_train['SK_ID_CURR'], app_train['TARGET']) best_models = [] for train_index, val_index in folds: encoders = pretrain(app_train, app_test, sequences, dims, train_index, val_index, args) train_dataloader = loader_maker.make(train_index) val_dataloader = loader_maker.make(val_index) model = LightningModel( PretrainedR2N(app_dims, args.n_hidden, args.n_main, encoders), nn.BCEWithLogitsLoss(), train_dataloader, val_dataloader, args) name = '82_vaelstm_fine' trainer = HomeCreditTrainer(name, args.n_epochs, args.patience) trainer.fit(model) best_model = load_model(model, name, trainer.logger.version) best_models.append(best_model) # Predict test_dataloader = loader_maker.make(index=None, train=False) df_submission = predict(best_models, test_dataloader) df_submission.to_csv(f'../submission/{name}.csv', index=False)
def main(): args = parse_args() seed_everything(args.seed) if args.onehot: app_train = joblib.load('../data/05_onehot/application_train.joblib') app_test = joblib.load('../data/05_onehot/application_test.joblib') dims = get_dims({'application_train': app_train}) _, _, cont_dim = dims['application_train'] n_input = cont_dim else: app_train = joblib.load( '../data/03_powertransform/application_train.joblib') app_test = joblib.load( '../data/03_powertransform/application_test.joblib') dims = get_dims({'application_train': app_train}) cat_dims, emb_dims, cont_dim = dims['application_train'] n_input = emb_dims.sum() + cont_dim n_hidden = args.n_hidden # CV skf = StratifiedKFold(n_splits=5) folds = skf.split(app_train['SK_ID_CURR'], app_train['TARGET']) best_models = [] for train_index, val_index in folds: train_dataloader = make_dataloader(app_train, train_index, args.batch_size, onehot=args.onehot) val_dataloader = make_dataloader(app_train, val_index, args.batch_size, onehot=args.onehot) if args.onehot: network = MLPOneHot(n_input, n_hidden) else: network = MLP(cat_dims, emb_dims, n_input, n_hidden) model = LightningModel(network, nn.BCEWithLogitsLoss(), train_dataloader, val_dataloader, args) name = '13_mlp-onehot' if args.onehot else '13_mlp-label' trainer = HomeCreditTrainer(name, args.n_epochs, args.patience) trainer.fit(model) best_model = load_model(model, name, trainer.logger.version) best_models.append(best_model) # Predict test_dataloader = make_dataloader(app_test, None, args.batch_size, train=False, onehot=args.onehot) df_submission = predict(best_models, test_dataloader) filename = '../submission/13_mlp-onehot.csv' if args.onehot else '../submission/13_mlp-label.csv' df_submission.to_csv(filename, index=False)