def configure_optimizers(self): optimizer = RAdam(self.parameters(), lr=self.hparams.learning_rate) scheduler = LambdaLR(optimizer, self.learning_rate_warmup) return { "optimizer": optimizer, "lr_scheduler": { "scheduler": scheduler, "interval": "step" }, }
def get_optimizer(config, model, filter_bias_and_bn=True): opt_lower = config.optimizer.name.lower() weight_decay = config.optimizer.params.weight_decay if 'adamw' in opt_lower or 'radam' in opt_lower: # Compensate for the way current AdamW and RAdam optimizers apply LR to the weight-decay # I don't believe they follow the paper or original Torch7 impl which schedules weight # decay based on the ratio of current_lr/initial_lr weight_decay /= config.optimizer.params.lr if weight_decay and filter_bias_and_bn: parameters = add_weight_decay(model, weight_decay) weight_decay = 0. else: parameters = model.parameters() if 'fused' in opt_lower: assert has_apex and torch.cuda.is_available( ), 'APEX and CUDA required for fused optimizers' opt_look_ahed = config.optimizer.lookahead.apply if opt_lower == 'sgd': optimizer = optim.SGD(parameters, lr=config.optimizer.params.lr, momentum=config.optimizer.params.momentum, weight_decay=weight_decay, nesterov=True) elif opt_lower == 'adam': optimizer = optim.Adam(parameters, lr=config.optimizer.params.lr) elif opt_lower == 'adamw': optimizer = AdamW(parameters, lr=config.optimizer.params.lr, weight_decay=weight_decay, eps=config.optimizer.params.opt_eps) elif opt_lower == 'nadam': optimizer = Nadam(parameters, lr=config.optimizer.params.lr, weight_decay=weight_decay, eps=config.optimizer.params.opt_eps) elif opt_lower == 'radam': optimizer = RAdam(parameters, lr=config.optimizer.params.lr, weight_decay=weight_decay, eps=config.optimizer.params.opt_eps) else: assert False and "Invalid optimizer" raise ValueError if opt_look_ahed: optimizer = Lookahead(optimizer) return optimizer
t = Transformer.trnsfrmr_nt(seq_len=seq_len, ini_len=args.ini_len, final_len=model_final_len).to(device) elif args.model == 'lstm': from Models import LSTM t = LSTM.lstm(seq_len=seq_len, ini_len=args.ini_len, final_len=model_final_len).to(device) if path.exists(args.param_file): t.load_state_dict(torch.load(args.param_file)) if args.optimizer == 'RAdam': from optimizers import RAdam optimizer = RAdam.RAdam(t.parameters(), lr=args.lr) elif args.optimizer == 'Adam': optimizer = torch.optim.Adam(t.parameters(), lr=args.lr) t = t.double() train_mse = [] test_mse = [10000] for ij in range(epochs): loss_list = [] for i, batch in enumerate(train_data_loader): optimizer.zero_grad() in_batch = batch['in'].to(device) out = t(in_batch) loss = lossfn(batch['out'].to(device), out) loss_list.append(loss)
hparams['resume_dir'] = args.resume_dir args = Namespace(**hparams) net = DeepConvolutionalUNet(hidden_size=args.n_fft // 2 + 1) net = nn.DataParallel(net) model_path = os.path.join(args.resume_dir, 'model_best.ckpt') print(f'Resume model from {model_path} ...') checkpoint = torch.load(model_path) net.load_state_dict(checkpoint['model_state_dict']) net = net.to(device) # optimization # optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=0.9) # optimizer = optim.Adam(net.parameters(), lr=args.learning_rate, weight_decay=0.1) optimizer = RAdam(net.parameters(), lr=args.learning_rate, weight_decay=0.1) scheduler = None if args.use_swa: steps_per_epoch = len(train_dataloader) // args.batch_size optimizer = SWA(optimizer, swa_start=20 * steps_per_epoch, swa_freq=steps_per_epoch) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer.optimizer, mode="max", patience=5, factor=0.5) else: scheduler = None
elif args.model == 'b1': model = EfficientNet.from_pretrained('efficientnet-b1', num_classes=6) elif args.model == 'b4': model = EfficientNet.from_pretrained('efficientnet-b4', num_classes=6) elif args.model == 'b5': model = EfficientNet.from_pretrained('efficientnet-b5', num_classes=6) model = torch.nn.DataParallel(model).cuda() #model.to(device) criterion = torch.nn.BCEWithLogitsLoss().cuda() sub_criterion = torch.nn.BCEWithLogitsLoss(reduce=False, reduction=None).cuda() plist = [{'params': model.parameters(), 'lr': 0.00001}] optimizer = RAdam(plist, lr=0.00001) def log_loss(output, label): rev_label = (label == 0).float() output = torch.sigmoid(output) loss = torch.abs(output - rev_label) loss = -torch.log(loss.prod(-1).mean()) return loss # Train #lr_sc = lr_scheduler.StepLR(optimizer, step_size=2) if args.keep > 0: checkpoint = torch.load('/home/boh001/save_model/effi/{}.pth'.format( args.keep))
def main(): print(args.work_dir, args.exp) work_dir = os.path.join(args.work_dir, args.exp) if not os.path.exists(work_dir): os.makedirs(work_dir) # copy this file to work dir to keep training configuration shutil.copy(__file__, os.path.join(work_dir, 'main.py')) with open(os.path.join(work_dir, 'args.pkl'), 'wb') as f: pickle.dump(args, f) # 1.dataset train_filename = args.trn_root test_filename = args.test_root trainset = Segmentation_2d_data(train_filename) valiset = Segmentation_2d_data(test_filename) train_loader = data.DataLoader(trainset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True) valid_loader = data.DataLoader(valiset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True) trn_logger = Logger(os.path.join(work_dir, 'train.log')) trn_raw_logger = Logger(os.path.join(work_dir, 'train_raw.log')) val_logger = Logger(os.path.join(work_dir, 'validation.log')) if args.model == 'unet': net = Unet2D(in_shape=(1, 512, 512), padding=args.padding_size, momentum=args.batchnorm_momentum) elif args.model == 'unetcoord': net = Unet2D_coordconv(in_shape=(1, 512, 512), padding=args.padding_size, momentum=args.batchnorm_momentum, coordnumber=args.coordconv_no, radius=False) elif args.model == 'unetmultiinput': net = Unet2D_multiinput(in_shape=(1, 512, 512), padding=args.padding_size, momentum=args.batchnorm_momentum) elif args.model == 'scse_block': net = Unet_sae(in_shape=(1, 512, 512), padding=args.padding_size, momentum=args.batchnorm_momentum) else: raise ValueError('Not supported network.') # loss if args.loss_function == 'bce': criterion = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([args.bce_weight])).cuda() elif args.loss_function == 'dice': criterion = DiceLoss().cuda() else: raise ValueError('{} loss is not supported yet.'.format(args.loss_function)) # optim if args.optim_function == 'sgd': optimizer = torch.optim.SGD(net.parameters(), lr=args.initial_lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optim_function == 'adam': optimizer = torch.optim.Adam(net.parameters(), lr=args.initial_lr, weight_decay=args.weight_decay) elif args.optim_function == 'radam': optimizer = RAdam(net.parameters(), lr=args.initial_lr, weight_decay = args.weight_decay) else: raise ValueError('{} loss is not supported yet.'.format(args.optim_function)) net = nn.DataParallel(net).cuda() cudnn.benchmark = True lr_schedule = args.lr_schedule lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=lr_schedule[:-1], gamma=0.1) best_iou = 0 for epoch in range(lr_schedule[-1]): train(train_loader, net, criterion, optimizer, epoch, trn_logger, trn_raw_logger) iou = validate(valid_loader, net, criterion, epoch, val_logger) lr_scheduler.step() is_best = iou > best_iou best_iou = max(iou, best_iou) checkpoint_filename = 'model_checkpoint_{:0>3}.pth'.format(epoch + 1) save_checkpoint({'epoch': epoch + 1, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict()}, is_best, work_dir, checkpoint_filename) draw_curve(work_dir, trn_logger, val_logger)
def main(): print(args.work_dir, args.exp) work_dir = os.path.join(args.work_dir, args.exp) kaggle_path = "/data2/sk_data/kaggle_data/stage_1_train_images_png" kaggle_csv_path = "/data2/sk_data/kaggle_data/bin_dataframe.csv" label_data = pd.read_csv(kaggle_csv_path) if not os.path.exists(work_dir): os.makedirs(work_dir) # copy this file to work dir to keep training configuration shutil.copy(__file__, os.path.join(work_dir, 'main.py')) with open(os.path.join(work_dir, 'args.pkl'), 'wb') as f: pickle.dump(args, f) # 1.dataset train_filename = args.trn_root test_filename = args.test_root if args.model == "efficientnet" : if args.kaggle == True : trainset = load_kaggle_data_with_balanced(kaggle_path, kaggle_csv_path) class_sample_count = np.array([len(np.where(label_data["any"]==t)[0]) for t in np.unique(label_data["any"])]) weight = 1. / class_sample_count train_weights = np.array([weight[t] for t in label_data["any"]]) train_sampler = torch.utils.data.WeightedRandomSampler(weights=train_weights, num_samples=len(train_weights)) else : trainset = Classification_Data(train_filename) class_sample_count = np.array([len(np.where(label_data["any"]==t)[0]) for t in np.unique(label_data["any"])]) weight = 1. / class_sample_count train_weights = np.array([weight[t] for t in label_data["any"]]) train_sampler = torch.utils.data.WeightedRandomSampler(weights=train_weights, num_samples=len(train_weights)) valiset = Classification_Data(test_filename) elif args.model == "resnet" : trainset = Classification_Data(train_filename) valiset = Classification_Data(test_filename) else : raise ValueError('Not supported network.') # train_history = History(len(trainset)) if args.kaggle == False : train_loader = data.DataLoader(trainset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False, sampler = train_sampler) else : train_loader = data.DataLoader(trainset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True) valid_loader = data.DataLoader(valiset, batch_size=args.batch_size, num_workers=args.num_workers) # save input stats for later use trn_logger = Logger(os.path.join(work_dir, 'train.log')) trn_raw_logger = Logger(os.path.join(work_dir, 'train_raw.log')) val_logger = Logger(os.path.join(work_dir, 'validation.log')) print(len(trainset)) # model if args.model == 'unet': net = Unet2D(in_shape=(args.multi_input, 512, 512), padding=args.padding_size, momentum=args.batchnorm_momentum) elif args.model == 'efficientnet' : net = EfficientNet.from_pretrained('efficientnet-' + args.number, num_classes=1) elif args.model == 'resnet' : net = models.resnet50(pretrained=True) num_ftrs = net.fc.in_fetures net.fc = nn.Linear(num_ftrs, 1) print("Load Resnet-50") else: raise ValueError('Not supported network.') # loss if args.loss_function == 'bce': criterion = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([args.bce_weight])).cuda() elif args.loss_function == "cross_entropy" : criterion = torch.nn.CrossEntropyLoss() else: raise ValueError('{} loss is not supported yet.'.format(args.loss_function)) # optim if args.optim_function == 'sgd': optimizer = torch.optim.SGD(net.parameters(), lr=args.initial_lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optim_function == 'adam': optimizer = torch.optim.Adam(net.parameters(), lr=args.initial_lr) elif args.optim_function == 'radam': optimizer = RAdam(net.parameters(), lr=args.initial_lr, weight_decay=args.weight_decay) else: raise ValueError('{} loss is not supported yet.'.format(args.optim_function)) net = nn.DataParallel(net).cuda() cudnn.benchmark = True lr_schedule = args.lr_schedule lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=lr_schedule[:-1], gamma=0.1) best_acc = 0 for epoch in range(lr_schedule[-1]): train(train_loader, net, criterion, optimizer, epoch, trn_logger, sublogger=trn_raw_logger, trainset = trainset, val_loader= valid_loader, val_logger = val_logger, val_mode=True) print("Done") loss, acc = validate(valid_loader, net, criterion, epoch, val_logger) lr_scheduler.step() if best_acc == 0 : best_acc = acc else : best_acc = max(acc, best_acc) is_best = True if is_best == True : checkpoint_filename = 'model_checkpoint_{:0>3}.pth'.format(epoch + 1) save_checkpoint({'epoch': epoch + 1, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict()}, is_best, work_dir, checkpoint_filename) draw_curve(work_dir, trn_logger, val_logger)
def configure_optimizers(self): return RAdam(self.parameters(), lr=self.hparams.learning_rate)