def _train_fold(self, train_df, target_df, fold): tr_x, va_x = train_df[fold == 0], train_df[fold > 0] tr_y, va_y = target_df[fold == 0], target_df[fold > 0] weight = fold.max() self.weights.append(weight) model = factory.get_model(self.cfg.model) model.fit(tr_x, tr_y, va_x, va_y, self.cat_features) va_pred = model.predict(va_x, self.cat_features) if self.cfg.data.target.reconvert_type: va_y = getattr(np, self.cfg.data.target.reconvert_type)(va_y) va_pred = getattr(np, self.cfg.data.target.reconvert_type)(va_pred) va_pred = np.where(va_pred >= 0, va_pred, 0) self.models.append(model) self.oof[va_x.index] = va_pred.copy() score = factory.get_metrics(self.cfg.common.metrics.name)(va_y, va_pred) self.scores.append(score) if self.cfg.model.name in ['lightgbm', 'catboost', 'xgboost']: importance_fold_df = pd.DataFrame() fold_importance = model.extract_importances() importance_fold_df['feature'] = train_df.columns importance_fold_df['importance'] = fold_importance self.feature_importance_df = pd.concat( [self.feature_importance_df, importance_fold_df], axis=0)
def main(): log_dir = Path('../logs/clf_resnet18_20200703133354_0.827/') cfg = dh.load(log_dir / 'config.yml') oof = np.load(log_dir / 'oof.npy') train_df = pd.read_csv('../data/input/train.csv') cfg.model.multi_gpu = False model = factory.get_model(cfg).to(device) model.load_state_dict(torch.load(log_dir / 'weight_best.pt')) for id_ in tqdm(train_df['image_id']): grad_cam_array = gradcam(model, id_) np.save(f'../data/input/grad_cam/{id_}.npy', grad_cam_array)
def __init__(self, cfg): super(LightningModuleSeg, self).__init__() assumed_models = {'unet', 'clsunet', 'msunet', 'msclsunet'} assert cfg.Model.model_arch in assumed_models self.cfg = cfg self.model_arch = cfg.Model.model_arch self.softmax = (cfg.Model.output == 'softmax') self.num_class = len(cfg.General.labels) self.num_class_seg = cfg.Model.out_channel self.skip_first_class = self.num_class < cfg.Model.out_channel self.net = get_model(cfg) self.loss = get_loss(cfg) self.metrics_keys = ['dice0', 'dice', 'dice_p', 'dice_n'] self.tta_enable = {'hflip', 'vflip', 'rotate90', 'rotate180'} self.tta = set(cfg.Augmentation.tta) # ex. {'hflip', 'vflip'} self.applied_tta_num = len(self.tta & self.tta_enable) print(f'Applied TTA: {self.tta & self.tta_enable}') self.pred_cls_thres = 0.6
def predict_fold(run_name, df, cfg, fold_num): test_loader = factory.get_dataloader(df, cfg=cfg.data.test) test_preds = np.zeros((len(test_loader.dataset), cfg.model.n_classes * cfg.data.test.tta.iter_num)) test_preds_tta = np.zeros((len(test_preds), cfg.model.n_classes)) test_batch_size = test_loader.batch_size model = factory.get_model(cfg, is_train=False).to(device) model.load_state_dict( torch.load(f'../logs/{run_name}/weight_best_{fold_num}.pt')) model.eval() for t in range(cfg.data.test.tta.iter_num): with torch.no_grad(): for i, (images) in enumerate(test_loader): images = images.to(device) preds = model(images.float()) test_preds[i * test_batch_size:(i + 1) * test_batch_size, t * cfg.model.n_classes:(t + 1) * cfg.model.n_classes] = preds.cpu().detach().numpy() for i in range(cfg.model.n_classes): preds_col_idx = [ i + cfg.model.n_classes * j for j in range(cfg.data.test.tta.iter_num) ] test_preds_tta[:, i] = np.mean(test_preds[:, preds_col_idx], axis=1).reshape(-1) test_preds_tta = 1 / (1 + np.exp(-test_preds_tta)) return test_preds_tta
def get_net(self): return get_model(self.cfg.Model)
def train_model(run_name, df, fold_df, cfg): oof = np.zeros(len(df)) cv = 0 for fold_, col in enumerate(fold_df.columns): print(f'\n========================== FOLD {fold_} ... ==========================\n') logging.debug(f'\n========================== FOLD {fold_} ... ==========================\n') trn_x, val_x = df[fold_df[col] == 0], df[fold_df[col] > 0] val_y = val_x.loc[:33126][cfg.common.target] val_org_idx = np.where(val_x.index <= 33126)[0] train_loader = factory.get_dataloader(trn_x, cfg.data.train) valid_loader = factory.get_dataloader(val_x, cfg.data.valid) model = factory.get_model(cfg).to(device) criterion = factory.get_loss(cfg) optimizer = factory.get_optim(cfg, model.parameters()) scheduler = factory.get_scheduler(cfg, optimizer) best_epoch = -1 best_val_score = -np.inf mb = master_bar(range(cfg.data.train.epochs)) train_loss_list = [] val_loss_list = [] val_score_list = [] for epoch in mb: start_time = time.time() model, avg_loss = train_epoch(model, train_loader, criterion, optimizer, mb, cfg) valid_preds, avg_val_loss = val_epoch(model, valid_loader, criterion, cfg) val_score = factory.get_metrics(cfg.common.metrics.name)(val_y, valid_preds[val_org_idx]) train_loss_list.append(avg_loss) val_loss_list.append(avg_val_loss) val_score_list.append(val_score) if cfg.scheduler.name != 'ReduceLROnPlateau': scheduler.step() elif cfg.scheduler.name == 'ReduceLROnPlateau': scheduler.step(avg_val_loss) elapsed = time.time() - start_time mb.write(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f} avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} time: {elapsed:.0f}s') logging.debug(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f} avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} time: {elapsed:.0f}s') if val_score > best_val_score: best_epoch = epoch + 1 best_val_score = val_score best_valid_preds = valid_preds if cfg.model.multi_gpu: best_model = model.module.state_dict() else: best_model = model.state_dict() oof[val_x.index] = best_valid_preds.reshape(-1) cv += best_val_score * fold_df[col].max() torch.save(best_model, f'../logs/{run_name}/weight_best_{fold_}.pt') save_png(run_name, cfg, train_loss_list, val_loss_list, val_score_list, fold_) print(f'\nEpoch {best_epoch} - val_score: {best_val_score:.4f}') logging.debug(f'\nEpoch {best_epoch} - val_score: {best_val_score:.4f}') print('\n\n===================================\n') print(f'CV: {cv:.6f}') logging.debug(f'\n\nCV: {cv:.6f}') print('\n===================================\n\n') result = { 'cv': cv, } np.save(f'../logs/{run_name}/oof.npy', oof) return result
os.mkdir(train_output_dir) test_output_dir = os.path.join(output_dir, "test/") if not os.path.exists(test_output_dir): os.mkdir(test_output_dir) # Load data train_loader, test_loader = factory.get_data(args.data, args.batch_size, args.workers) # Load models disc = factory.get_discriminator(args.data, args.disc).to(args.device) disc2 = factory.get_discriminator(args.data, args.disc).to(args.device) gen = factory.get_generator(args.data, args.gen, args.deconv).to(args.device) model = factory.get_model(args, disc, disc2, gen) if args.test: disc.load_state_dict(torch.load(model_output_dir + 'disc.pth')) gen.load_state_dict(torch.load(model_output_dir + 'gen.pth')) print("Loaded model") test_loss = test(args, args.epochs, disc, gen, test_loader, test_output_dir) print("Outputted test results") else: with open(output_dir + '/config.txt', 'w') as f: json.dump(args.__dict__, f) model.train(train_loader, test_loader, output_dir, train_output_dir, test_output_dir, model_output_dir)
def train_cnn(run_name, trn_x, val_x, trn_y, val_y, cfg): train_loader = factory.get_dataloader(trn_x, trn_y, cfg.data.train) valid_loader = factory.get_dataloader(val_x, val_y, cfg.data.valid) model = factory.get_model(cfg).to(device) criterion = factory.get_loss(cfg) optimizer = factory.get_optim(cfg, model.parameters()) scheduler = factory.get_scheduler(cfg, optimizer) best_epoch = -1 best_val_score = -np.inf best_coef = [] mb = master_bar(range(cfg.data.train.epochs)) train_loss_list = [] val_loss_list = [] val_score_list = [] initial_coef = [0.5, 1.5, 2.5, 3.5, 4.5] for epoch in mb: start_time = time.time() model, avg_loss = train_epoch(model, train_loader, criterion, optimizer, mb, cfg) valid_preds, avg_val_loss = val_epoch(model, valid_loader, criterion, cfg) if cfg.model.n_classes > 1: val_score = quadratic_weighted_kappa(val_y, valid_preds.argmax(1)) cm = confusion_matrix(val_y, valid_preds.argmax(1)) else: optR = QWKOptimizedRounder() optR.fit(valid_preds.copy(), val_y, initial_coef) coef = optR.coefficients() valid_preds_class = optR.predict(valid_preds.copy(), coef) val_score = quadratic_weighted_kappa(val_y, valid_preds_class) cm = confusion_matrix(val_y, valid_preds_class) # cm = np.round(cm / np.sum(cm, axis=1, keepdims=True), 3) train_loss_list.append(avg_loss) val_loss_list.append(avg_val_loss) val_score_list.append(val_score) if cfg.scheduler.name != 'ReduceLROnPlateau': scheduler.step() elif cfg.scheduler.name == 'ReduceLROnPlateau': scheduler.step(avg_val_loss) elapsed = time.time() - start_time mb.write(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f} avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} time: {elapsed:.0f}s') logging.debug(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f} avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} time: {elapsed:.0f}s') if val_score > best_val_score: best_epoch = epoch + 1 best_val_score = val_score best_valid_preds = valid_preds if cfg.model.multi_gpu: best_model = model.module.state_dict() else: best_model = model.state_dict() if cfg.model.n_classes == 1: best_coef = coef best_cm = cm print('\n\nCONFUSION MATRIX') logging.debug('\n\nCONFUSION MATRIX') print(cm) logging.debug(cm) print('\n\n===================================\n') print(f'CV: {best_val_score:.6f}') print(f'BEST EPOCH: {best_epoch}') logging.debug(f'\n\nCV: {best_val_score:.6f}') logging.debug(f'BEST EPOCH: {best_epoch}\n\n') print('\n===================================\n\n') result = { 'cv': best_val_score, } np.save(f'../logs/{run_name}/oof.npy', best_valid_preds) np.save(f'../logs/{run_name}/best_coef.npy', best_coef) torch.save(best_model, f'../logs/{run_name}/weight_best.pt') save_png(run_name, cfg, train_loss_list, val_loss_list, val_score_list) return result
def train_ordinal_reg(run_name, trn_x, val_x, trn_y, val_y, cfg): ordinal_val_preds = np.zeros_like(val_y) for i, col in enumerate(trn_y.columns[1:]): print(f'\n\n==================== {col} ====================') logging.debug(f'\n\n==================== {col} ====================') train_loader = factory.get_dataloader(trn_x, trn_y[col], cfg.data.train) valid_loader = factory.get_dataloader(val_x, val_y[col], cfg.data.valid) model = factory.get_model(cfg).to(device) criterion = factory.get_loss(cfg) optimizer = factory.get_optim(cfg, model.parameters()) scheduler = factory.get_scheduler(cfg, optimizer) best_epoch = -1 best_val_loss = np.inf mb = master_bar(range(cfg.data.train.epochs)) train_loss_list = [] val_loss_list = [] val_score_list = [] initial_coef = [0.5, 1.5, 2.5, 3.5, 4.5] for epoch in mb: start_time = time.time() model, avg_loss = train_epoch(model, train_loader, criterion, optimizer, mb, cfg) valid_preds, avg_val_loss = val_epoch(model, valid_loader, criterion, cfg) train_loss_list.append(avg_loss) val_loss_list.append(avg_val_loss) if cfg.scheduler.name != 'ReduceLROnPlateau': scheduler.step() elif cfg.scheduler.name == 'ReduceLROnPlateau': scheduler.step(avg_val_loss) elapsed = time.time() - start_time mb.write(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f} avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s') logging.debug(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f} avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s') if avg_val_loss < best_val_loss: best_epoch = epoch + 1 best_val_loss = avg_val_loss best_valid_preds = valid_preds if cfg.model.multi_gpu: best_model = model.module.state_dict() else: best_model = model.state_dict() print(f'epoch: {best_epoch} loss: {best_val_loss}') ordinal_val_preds[:, i] = 1 / (1 + np.exp(-1 * best_valid_preds)) np.save(f'../logs/{run_name}/oof_{col}.npy', best_valid_preds) torch.save(best_model, f'../logs/{run_name}/weight_best_{col}.pt') valid_preds = np.sum(ordinal_val_preds, axis=1) val_y = (np.sum(val_y.values, axis=1) - 1).astype(int) optR = QWKOptimizedRounder() optR.fit(valid_preds.copy(), val_y, initial_coef) best_coef = optR.coefficients() valid_preds_class = optR.predict(valid_preds.copy(), best_coef) best_val_score = quadratic_weighted_kappa(val_y, valid_preds_class) cm = confusion_matrix(val_y, valid_preds_class) print('\n\nCONFUSION MATRIX') logging.debug('\n\nCONFUSION MATRIX') print(cm) logging.debug(cm) print('\n\n===================================\n') print(f'CV: {best_val_score:.6f}') logging.debug(f'\n\nCV: {best_val_score:.6f}') print('\n===================================\n\n') result = { 'cv': best_val_score, } np.save(f'../logs/{run_name}/best_coef.npy', best_coef) return result