def __init__(self, model, loader, lr, device, writer, step, optim_choose='adam'): self.model = model self.loader = loader self.criterion1 = nn.MSELoss(size_average=True).to(device) # self.criterion2 = pytorch_ssim.SSIM() self.criterion3 = COS_Loss().to(device) self.criterion4 = VGG_Encoder() self.device = device self.writer = writer self.step = step parameters1 = [] parameters2 = [] parameters1.append( {'params': self.model.decomposer.encoder2.parameters(), 'lr': lr} ) parameters1.append( {'params': self.model.decomposer.decoder_normals.parameters(), 'lr': lr} ) parameters1.append( {'params': self.model.decomposer.decoder_lights.parameters(), 'lr': lr} ) parameters1.append( {'params': self.model.shader.parameters(), 'lr': lr} ) parameters2.append( {'params': self.model.decomposer.encoder1.parameters(), 'lr': lr} ) parameters2.append( {'params': self.model.decomposer.decoder_reflectance.parameters(), 'lr': lr} ) # parameters1.append( {'params': self.model.reflection.parameters(), 'lr': lr} ) # parameters2.append( {'params': self.model.shader.parameters(), 'lr': lr} ) if optim_choose == 'adam': self.optimizer1 = optim.Adam(parameters1, lr=lr) self.optimizer2 = optim.Adam(parameters2, lr=lr) elif optim_choose == 'adabound': self.optimizer1 = adabound.AdaBound(parameters1, lr=lr, final_lr=0.1) self.optimizer2 = adabound.AdaBound(parameters2, lr=lr, final_lr=0.1)
def get_optimizer(cfg, model): optimizer = None if cfg.MODEL.FINETUNE: if cfg.TRAIN.OPTIMIZER == 'sgd': optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=cfg.TRAIN.LR, momentum=cfg.TRAIN.MOMENTUM, weight_decay=cfg.TRAIN.WD, nesterov=cfg.TRAIN.NESTEROV) elif cfg.TRAIN.OPTIMIZER == 'adam': optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=cfg.TRAIN.LR) elif cfg.TRAIN.OPTIMIZER == 'adam_amsgrad': optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=cfg.TRAIN.LR, amsgrad=True) elif cfg.TRAIN.OPTIMIZER == 'adamw': optimizer = AdamW( filter(lambda p: p.requires_grad, model.parameters()), lr=cfg.TRAIN.LR, weight_decay=1e-4, ) elif cfg.TRAIN.OPTIMIZER == 'adabound': optimizer = adabound.AdaBound( filter(lambda p: p.requires_grad, model.parameters()), lr=cfg.TRAIN.LR, final_lr=0.1, ) else: if cfg.TRAIN.OPTIMIZER == 'sgd': optimizer = optim.SGD(model.parameters(), lr=cfg.TRAIN.LR, momentum=cfg.TRAIN.MOMENTUM, weight_decay=cfg.TRAIN.WD, nesterov=cfg.TRAIN.NESTEROV) elif cfg.TRAIN.OPTIMIZER == 'adam': optimizer = optim.Adam(model.parameters(), lr=cfg.TRAIN.LR) elif cfg.TRAIN.OPTIMIZER == 'adam_amsgrad': optimizer = optim.Adam(model.parameters(), lr=cfg.TRAIN.LR, amsgrad=True) elif cfg.TRAIN.OPTIMIZER == 'adamw': optimizer = AdamW( model.parameters(), lr=cfg.TRAIN.LR, weight_decay=1e-4, ) elif cfg.TRAIN.OPTIMIZER == 'adabound': optimizer = adabound.AdaBound( model.parameters(), lr=cfg.TRAIN.LR, final_lr=0.1, ) return optimizer
def prep_optim(self): if self.args.optim == "adam": self.optimizer = optim.Adam(self.model.parameters(), lr=self.args.lr) elif self.args.optim == "adabound": self.optimizer = adabound.AdaBound(self.model.parameters(), lr=self.args.lr) elif self.args.optim == "rmsprop": self.optimizer = optim.RMSprop(self.model.parameters(), lr=self.args.lr) elif self.args.optim == "sgd": self.optimizer = optim.SGD(self.model.parameters(), lr=self.args.lr) elif self.args.optim == "bfgs": self.optimizer = optim.LBFGS(self.model.parameters(), lr=self.args.lr) elif self.args.optim == "adamw": self.optimizer = optim.AdamW(self.model.parameters(), lr=self.args.lr) elif self.args.optim == "asgd": self.optimizer = optim.ASGD(self.model.parameters(), lr=self.args.lr) else: print("Invalid optimizer chosen") raise
def get_optimizer(trial, model_ft): lr = trial.suggest_loguniform('learning_rate', 1e-7, 1e-3) final_lr = trial.suggest_loguniform('final_lr', 1e-5, 1e-1) weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2) optimizer_ft = adabound.AdaBound(model_ft.parameters(), lr=lr, \ final_lr=final_lr, betas=(0.9,0.999), gamma=0.001, weight_decay=weight_decay) return optimizer_ft
def set_optimizer_g(model_g, lr, w_decay=0.0005): params = model_g.get_learnable_params() param_list = [] for k, p in params.items(): param_list.append(p) optimizer = adabound.AdaBound(param_list, lr=lr, weight_decay=w_decay) return optimizer
def set_optimizer(optim_type: str, net_params: Any, lr: float, amsgrad_bool: bool = True) -> Any: if optim_type == "Adam": return optim.Adam(net_params, lr=lr, amsgrad=amsgrad_bool) elif optim_type == "adabound": return adabound.AdaBound(net_params, lr=lr) else: print("ERROR in set_optimizer") sys.exit()
def get_optimizer(name, parameters, lr): """ This function instantiates optimizers given model parameters and various hyperparameters :param name: name of the optimizer to choose from :param parameters: all trainable model parameters :param lr: learning rate :return: optimizer object """ if name == 'sgd': # TODO: test momentum and weight_decay # 1e-07 decay? # weight_decay=1e-6 return torch.optim.SGD( parameters, lr=lr, momentum=0.9, nesterov=True ) # bad results: weight_decay=1e-07, momentum=0.9, nesterov=True # return torch.optim.SGD(parameters, lr=lr,weight_decay=1e-5, momentum=0.9, nesterov=True) # bad results: weight_decay=1e-07, momentum=0.9, nesterov=True elif name == "openai_adam": # This optimizer is from: https://github.com/huggingface/pytorch-openai-transformer-lm optimizer_openai = OpenAIAdam( parameters, lr=6.25e-5, schedule='warmup_linear', warmup=0.002, t_total=3, # TODO: not sure what this actually represents b1=0.9, b2=0.999, e=1e-8, l2=0.01, vector_l2=True, max_grad_norm=1) return optimizer_openai elif name == 'nadam': return NAdam(parameters, lr=lr) elif name == 'asgd': return torch.optim.ASGD(parameters, lr=lr) elif name in ['adagrad', 'myadagrad']: # use new adagrad to allow for init accumulator value return MyAdagrad(parameters, lr=lr, init_accu_value=0.1) elif name == 'adam': # return torch.optim.Adam(parameters, betas=(0.9, 0.98), lr=lr, eps=1e-9) # TODO: set amsgrad=True return torch.optim.Adam(parameters) # use default settings elif name == 'adamax': return torch.optim.Adamax(parameters, lr=lr) elif name == "noopt_nadam": return NAdam(parameters, lr=0, betas=(0.9, 0.98), eps=1e-9) elif name == "noopt_adam": # this comes from http://nlp.seas.harvard.edu/2018/04/03/attention.html # but with a modification of using a zero_grad function in the class return NoamOpt( 360, 1, 200, torch.optim.Adam(parameters, lr=0, betas=(0.9, 0.98), eps=1e-9)) elif name == "adabound": return adabound.AdaBound( parameters, lr=1e-3, final_lr=lr) # lr=1e-3, final_lr= 0.1 by default else: raise Exception("Unsupported optimizer: {}".format(name))
def get_optimizer(args, model): if args.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.base_lr, momentum=args.momentum, weight_decay=args.wd) elif args.optimizer == 'nag': optimizer = optim.SGD(model.parameters(), lr=args.base_lr, momentum=args.momentum, weight_decay=args.wd, nesterov=True) elif args.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.base_lr, weight_decay=args.wd) elif args.optimizer == 'amsgrad': optimizer = optim.Adam(model.parameters(), lr=args.base_lr, weight_decay=args.wd, amsgrad=True) elif args.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=args.base_lr, momentum=args.momentum, weight_decay=args.wd) elif args.optimizer == 'adabound': optimizer = adabound.AdaBound(model.parameters(), lr=args.base_lr, weight_decay=args.wd, final_lr=args.final_lr) elif args.optimizer == 'amsbound': optimizer = adabound.AdaBound(model.parameters(), lr=args.base_lr, weight_decay=args.wd, final_lr=args.final_lr, amsbound=True) else: raise 'unknown optimizer' return optimizer
def __init__(self): self.eval_net, self.target_net = Net(), Net() self.learn_step_counter = 0 # for target updating self.memory_counter = 0 # for storing memory self.memory = np.zeros( (MEMORY_CAPACITY, N_STATES * 2 + 2)) # initialize memory # self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=LR) self.optimizer = adabound.AdaBound(self.eval_net.parameters(), lr=LR, final_lr=0.01) self.loss_func = nn.MSELoss()
def make_optimizer(args, targets): # optimizer = optim.Adam(targets.parameters(), lr=args.lr, weight_decay=args.weight_decay) if args.optimizer == 'AdaBound': optimizer = adabound.AdaBound(targets.parameters(), lr=args.lr, final_lr=0.1) elif args.optimizer == 'AMSGrad': optimizer = optim.Adam(targets.parameters(), lr=args.lr, weight_decay=args.weight_decay, amsgrad=True) elif args.optimizer == 'AMSBound': optimizer = adabound.AdaBound(targets.parameters(), lr=args.lr, final_lr=0.1, amsbound=True) else: optimizer = optim.Adam(targets.parameters(), lr=args.lr, weight_decay=args.weight_decay) return optimizer
def get_optimizer(model, args): parameters = [] for name, param in model.named_parameters(): # bias weight_decay zero # bias_list = (param for name, param in model.named_parameters() if name[-4:] == 'bias') # others_list = (param for name, param in model.named_parameters() if name[-4:] != 'bias') # parameters = [{'parameters': bias_list, 'weight_decay': 0}, # {'parameters': others_list}] # trick => custom layer params learning_rate to enlarge if 'fc' in name or 'class' in name or 'last_linear' in name or 'ca' in name or 'sa' in name: # trick => bias params do not use weight_decay if name[-4:] == 'bias': parameters.append({'params': param, 'lr': args.lr * args.lr_times, 'weight_decay': 0.0}) else: parameters.append({'params': param, 'lr': args.lr * args.lr_times}) else: if name[:-4] == 'bias': parameters.append({'params': param, 'lr': args.lr, 'weight_decay':0.0}) else: parameters.append({'params': param, 'lr': args.lr}) if args.optimizer == 'sgd': return torch.optim.SGD(parameters, # model.parameters(), args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) elif args.optimizer == 'rmsprop': return torch.optim.RMSprop(parameters, # model.parameters(), args.lr, alpha=args.alpha, weight_decay=args.weight_decay) elif args.optimizer == 'adam': return torch.optim.Adam(parameters, # model.parameters(), args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer == 'AdaBound': return adabound.AdaBound(parameters, # model.parameters(), lr=args.lr, final_lr=args.final_lr) elif args.optimizer == 'radam': return RAdam(parameters, lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) else: raise NotImplementedError
def get_optimizer(config: Config, model: nn.Module): params = model.parameters() if config.optimizer.lower() == "sgd": print(colored("Using SGD: lr is: {}, L2 regularization is: {}".format(config.learning_rate, config.l2), 'yellow')) return optim.SGD(params, lr=config.learning_rate, weight_decay=float(config.l2)) elif config.optimizer.lower() == "adam": print(colored("Using Adam", 'yellow')) return optim.Adam(params) elif config.optimizer.lower() == "adabound": print(colored("Using adabound: lr is: {}, final lr is: {}".format(0.001, 0.1), 'yellow')) return adabound.AdaBound(params, lr=1e-3, final_lr=0.1) else: print("Illegal optimizer: {}".format(config.optimizer)) exit(1)
def _load_optimizer(self): print('loading optimizer...') import torch.optim as optim if self.args.optimizer == 'adadelta': self.optimizer = optim.Adadelta(self.policies, lr=self.args.lr, weight_decay=self.args.wd) elif self.args.optimizer == 'adagrad': self.optimizer = optim.Adagrad(self.policies, lr=self.args.lr, weight_decay=self.args.wd) elif self.args.optimizer == 'adam': self.optimizer = optim.Adam(self.policies, lr=self.args.lr, weight_decay=self.args.wd) elif self.args.optimizer == 'rmsprop': self.optimizer = optim.RMSprop(self.policies, lr=self.args.lr, momentum=self.args.momentum, weight_decay=self.args.wd) elif self.args.optimizer == 'sgd': self.optimizer = optim.SGD(self.policies, lr=self.args.lr, momentum=self.args.momentum, dampening=0, nesterov=self.args.nesterov, weight_decay=self.args.wd) elif self.args.optimizer == 'adabound': import adabound self.optimizer = adabound.AdaBound(self.policies, lr=self.args.lr, final_lr=self.args.final_lr) if self.args.scheduler == 'step_lr': self.scheduler = optim.lr_scheduler.StepLR( self.optimizer, step_size=self.args.step_size, gamma=0.2, last_epoch=-1) elif self.args.scheduler == 'multi_step_lr': self.scheduler = optim.lr_scheduler.MultiStepLR( self.optimizer, milestones=self.args.milestones, gamma=0.2, last_epoch=-1) print('optimizer load finished!')
def get_optimizer(lr): if args.optim=='adam': args.clearmomentum=True return torch.optim.Adam(model.parameters(),lr=lr) elif args.optim=='sgd': args.clearmomentum=True return torch.optim.SGD(model.parameters(),lr=lr) elif args.optim=='sgdm': args.clearmomentum=False return torch.optim.SGD(model.parameters(),lr=lr,momentum=0.85) elif args.optim=='adabound': import adabound args.clearmomentum=False return adabound.AdaBound(model.parameters(),lr=lr) return None
def Start_Train_Local(args): # To build the net for localization args.model_choose = 'Localization_ResVNet' model_local = Build_Net(args) # To init the weight in net model_local = weight_init(model_local) print("Start to load the SegThor Training Data for Localization") train_local_set = DataLoader_SegThor.SegThorDatasetLocalization( dataset_folder = args.data_folder, phase = 'train', zoom_size = [256,256,256]) train_local_loader = DataLoader(train_local_set, batch_size = args.train_batch_size, shuffle = True, num_workers = 1) print("Training Data Numbers: ", len(train_local_loader)) print("Start to load the SegThor Validation Data for Localization") valid_local_set = DataLoader_SegThor.SegThorDatasetLocalization( args.data_folder, phase = 'valid', zoom_size = [256, 256, 256]) valid_local_loader = DataLoader(valid_local_set, batch_size = args.valid_batch_size, shuffle = False, num_workers = 1) print("Valid Data Numbers: ", len(valid_local_loader)) # Define the Optimizer if args.optimizer == 'SGD': optimizer = optim.SGD(model_local.parameters(), lr = args.lr, momentum = args.momentum) elif args.optimizer == 'Adam': optimizer = optim.Adam(model_local.parameters(), lr = args.lr, betas=(args.beta1, args.beta2), eps=1e-08, weight_decay = 0, amsgrad = False) elif args.optimizer == 'RmsProp': optimizer = optim.RMSprop(model_local.parameters(), weight_decay = args.weight_decay) elif args.optimizer == 'AdaBound': optimizer = adabound.AdaBound(params = model_local.parameters(), lr = args.lr, final_lr = 0.1) # Start to train model_local.train() for epoch in range(1, args.max_epoch + 1): Train_Local(args, epoch, model_local, optimizer, train_local_loader) if epoch % 10 == 0: model_local.eval() Valid_Local(args, model, valid_local_loader) model_local.train() if int(epoch % 50) == 0: torch.save(model_triplet.state_dict(), '/mnt/lustrenew/zhangzhizuo/Model/{}__{}__{}__{}.pkl'.format(args.model_choose, time.strftime('%Y.%m.%d.%H.%I.%M.%S',time.localtime(time.time())), epoch, 'Local')) folder = '/mnt/lustre/zhangzhizuo/Data/' for file in os.listdir(folder): np.savez_compressed(os.path.join(save_path, seriesUID + '.npz'), origin=origin, spacing_old=spacing, spacing_new=resolution, image=re_img.astype(np.float16), mask=seg, seriesUID=seriesUID, direction=isflip, pad=pad, bbox_old=originalshape, bbox_new=re_img.shape) return
def make_optimizer(model: object, optimizer_cfg: object) -> object: if optimizer_cfg.type == 'sgd': LOG.info('\n Optimizer: SGD') return optim.SGD(model.parameters(), lr=optimizer_cfg.lr, momentum=optimizer_cfg.momentum, weight_decay=optimizer_cfg.decay) elif optimizer_cfg.type == 'adam': LOG.info('\n Optimizer: Adam') return optim.Adam(model.parameters(), lr=optimizer_cfg.lr, betas=(optimizer_cfg.beta1, optimizer_cfg.beta2), eps=optimizer_cfg.eps) elif optimizer_cfg.type == 'amsgrad': LOG.info('\n Optimizer: AMSGrad') return optim.Adam(model.parameters(), lr=optimizer_cfg.lr, betas=(optimizer_cfg.beta1, optimizer_cfg.beta2), eps=optimizer_cfg.eps, amsgrad=True) elif optimizer_cfg.type == 'rmsprop': LOG.info('\n Optimizer: RMSProp') return optim.RMSprop(model.parameters(), lr=optimizer_cfg.lr, alpha=optimizer_cfg.alpha, eps=optimizer_cfg.eps) elif optimizer_cfg.type == "adadelta": LOG.info('\n Optimizer: Adadelta') return optim.Adadelta(model.parameters(), lr=optimizer_cfg.lr, rho=optimizer_cfg.rho, eps=optimizer_cfg.eps, weight_decay=optimizer_cfg.decay) elif optimizer_cfg.type == "adagrad": LOG.info('\n Optimizer: Adagrad') return optim.Adagrad( model.parameters(), lr=optimizer_cfg.lr, lr_decay=optimizer_cfg.lr_decay, weight_decay=optimizer_cfg.decay, initial_accumulator_value=optimizer_cfg.initial_accumulator_value) elif args.optimizer == "adabound": LOG.info('\n Optimizer: AdaBound') return adabound.AdaBound(model.parameters(), lr=optimizer_cfg.lr, final_lr=optimizer_cfg.flr) else: raise NotImplementedError('This optimizer is not supported.')
def get_optimizer(model, args): parameters = [] for name, param in model.named_parameters(): if 'fc' in name or 'class' in name or 'last_linear' in name or 'ca' in name or 'sa' in name: parameters.append({ 'params': param, 'lr': args.lr * args.lr_fc_times }) else: parameters.append({'params': param, 'lr': args.lr}) if args.optimizer == 'sgd': return torch.optim.SGD( parameters, # model.parameters(), args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) elif args.optimizer == 'rmsprop': return torch.optim.RMSprop( parameters, # model.parameters(), args.lr, alpha=args.alpha, weight_decay=args.weight_decay) elif args.optimizer == 'adam': return torch.optim.Adam( parameters, # model.parameters(), args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer == 'AdaBound': return adabound.AdaBound( parameters, # model.parameters(), lr=args.lr, final_lr=args.final_lr) elif args.optimizer == 'radam': return RAdam(parameters, lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) else: raise NotImplementedError
def Start_Train_Multi(args): args.model_choose = 'ResVNet_Triplet' model_triplet = Build_Net(args) model_triplet = kaiming_normal(model_triplet) #model.apply(weights_init) # Loading Data in the Dataset print("Start to load the SegThor Trainning Data for Triplet!") args.data_folder = "/mnt/lustre/zhangzhizuo/Data/SegThor" data_folder = args.data_folder train_set = SegThorDatasetTriplet(dataset_folder = data_folder, phase = 'train', crop_size = args.crop_size_triplet) train_loader = DataLoader(train_set, batch_size = args.train_batch_size, shuffle = True, num_workers = 1) print("Training Data Numbers: ", len(train_loader.dataset)) print("Start to load the SegThor Validation Data") valid_set = SegThorDatasetTriplet(data_folder, phase = 'valid', crop_size = args.crop_size_triplet) valid_loader = DataLoader(valid_set, batch_size = args.valid_batch_size, shuffle = False, num_workers = 1) print("Valid Data Numbers: ", len(valid_loader.dataset)) #Build the model for Heart training # Define the Optimizer if args.optimizer == 'SGD': optimizer = optim.SGD(model_triplet.parameters(), lr = args.lr, momentum = args.momentum) elif args.optimizer == 'Adam': optimizer = optim.Adam(model_triplet.parameters(), lr = args.lr, betas=(args.beta1, args.beta2), eps=1e-08, weight_decay = 0, amsgrad = False) elif args.optimizer == 'RmsProp': optimizer = optim.RMSprop(model_triplet.parameters(), weight_decay = args.weight_decay) elif args.optimizer == 'AdaBound': optimizer = adabound.AdaBound(params = model_triplet.parameters(), lr = args.lr, final_lr = 0.1) model_triplet.train() for epoch in (range(1, args.max_epoch + 1)): Train_Triplet(args, epoch, model_triplet, optimizer, train_loader, total_classes = 4, target_class = -1) if int(epoch%5) == 0: model_triplet.eval() Valid_Triplet(args, model_triplet, valid_loader, target_class = -1, total_classes = 4) model_triplet.train() if int(epoch % 50) == 0: torch.save(model_triplet.state_dict(), '/mnt/lustrenew/zhangzhizuo/Model/{}__{}__{}__{}.pkl'.format(args.model_choose, time.strftime('%Y.%m.%d.%H.%I.%M.%S',time.localtime(time.time())), epoch, 'Triplet')) return
def get_optim(params, config): if config.type == 'adam': optimizer = torch.optim.Adam(params, lr=config.lr, betas=config.betas) elif config.type == 'adabound': import adabound optimizer = adabound.AdaBound(params, lr=config.lr, final_lr=config.final_lr) elif config.type == 'sgd': optimizer = torch.optim.SGD(params, lr=config.lr, momentum=config.momentum, weight_decay=config.weight_decay, nesterov=config.nesterov) else: raise Exception("Optimizer not supported: %s" % config.optimizer) return optimizer
def create_model(workspace, dataset, num_classes, model_name, freeze, dropout, prob, lr, momentum, step_size, gamma, criterion_name, optimizer_name, device, dataparallel, gpus): if model_name == 'ResNet50' or model_name == 'ResNeXt50' or model_name == 'Inception_v3' or model_name == 'DenseNet161': model = recall_model(model_name) model = edit_model(model_name, model, dropout, prob, freeze, num_classes) transform = create_transform(model_name) elif len(model_name) == 2: transform = [] model1 = recall_model(model_name[0]) model1 = edit_model(model_name[0], model1, dropout, prob, freeze, 1024) transform.append(create_transform(model_name[0])) model2 = recall_model(model_name[1]) model2 = edit_model(model_name[1], model2, dropout, prob, freeze, 1024) transform.append(create_transform(model_name[1])) model = TwoInputsNet(model1, model2, num_classes) if dataparallel: model = torch.nn.DataParallel(model, device_ids=gpus) if optimizer_name == 'SGD': optimizer_conv = optim.SGD(model.parameters(), lr=lr, momentum=momentum) elif optimizer_name == 'AdamW': optimizer_conv = optim.AdamW(model.parameters(), lr=lr) elif optimizer_name == 'Adam': optimizer_conv = optim.Adam(model.parameters(), lr=lr) elif optimizer_name == 'AdaBound': optimizer_conv = adabound.AdaBound(model.parameters(), lr=lr) exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=step_size, gamma=gamma) if criterion_name == 'Cross Entropy': criterion = nn.CrossEntropyLoss() elif criterion_name == 'Focal': criterion = FocalLoss() return model, optimizer_conv, criterion, exp_lr_scheduler, transform
def configure_optimizers(self): params = self.hparams if params.optimizer == 'sgd': optimizer = torch_optim.SGD(self.parameters(), lr=params.lr, weight_decay=params.weight_decay, momentum=0.9) elif params.optimizer == 'adam': optimizer = torch_optim.Adam(self.parameters(), lr=params.lr, weight_decay=params.weight_decay) elif params.optimizer == 'adabound': import adabound optimizer = adabound.AdaBound(self.parameters(), lr=params.lr, final_lr=params.lr * 10, weight_decay=params.weight_decay) else: raise NotImplementedError() if params.sched == 'plat': sched = lr_sched.ReduceLROnPlateau(optimizer, patience=0, factor=params.sched_factor, verbose=True, min_lr=0.0004) return [optimizer], [sched] elif self.hparams.sched == 'sgdr': sched = lr_sched.CosineAnnealingWarmRestarts( optimizer, self.hparams.sched_factor) return [optimizer], [sched] elif self.hparams.sched == 'step': sched = lr_sched.MultiStepLR(optimizer, milestones=[3, 6], gamma=0.3) return [optimizer], [sched] elif params.sched == 'none': return optimizer else: raise NotImplementedError()
def get_optimizer(model, optim_str): torch_optim_list = ['SGD', 'Adam'] possible_optim_list = torch_optim_list + ['RAdam', 'AdaBound'] optim_args = optim_str.split('/') name = optim_args[0] assert name in possible_optim_list, '{} not implemented.'.format(name) args_dict = { e.split('=')[0]: eval(e.split('=')[1]) for e in optim_args[1:] } model_params = model.parameters() if name in torch_optim_list: optimizer = optim.__dict__[name](model_params, **args_dict) elif name == 'AdaBound': optimizer = adabound.AdaBound(model_params, **args_dict) elif name == 'RAdam': optimizer = RAdam(model_params, **args_dict) return optimizer
def optuna_optimizer(trial, model, args): """A function for optimizing optimizers using Optuna. Arguments: trial: Optuna related. Details: https://optuna.readthedocs.io/en/latest/ model: a model class args: argparse arguments. Returns: optimizer: optimizer of choice. Optuna is define-by-run and it optimizes for the optimizer with parameters if you define like below. """ optimizer_names = ['adam', 'momentum', 'adabound'] optimizer_name = trial.suggest_categorical('optim_type', optimizer_names) weight_decay = trial.suggest_loguniform('weight_decay', 1e-8, 1e-3) lr = trial.suggest_loguniform('lr', 1e-5, 1e-1) if optimizer_name == optimizer_names[0]: optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay, amsgrad=True) elif optimizer_name == optimizer_names[1]: optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay) elif optimizer_name == optimizer_names[2]: optimizer = adabound.AdaBound(model.parameters(), lr=lr, weight_decay=weight_decay) else: raise ( "The optimizer type not defined. Double check the configuration file." ) return optimizer
def create_optimizer(params, mode, *args, **kwargs): if mode == 'SGD': opt = optim.SGD(params, *args, momentum=0., **kwargs) elif mode.startswith('nesterov'): momentum = float(mode[len('nesterov'):]) opt = optim.SGD(params, *args, momentum=momentum, nesterov=True, **kwargs) elif mode.lower() == 'adam': betas = kwargs.pop('betas', (.9, .999)) opt = optim.Adam(params, *args, betas=betas, amsgrad=True, weight_decay=1e-4, **kwargs) elif mode.lower() == 'adam_hyp2': betas = kwargs.pop('betas', (.5, .99)) opt = optim.Adam(params, *args, betas=betas, amsgrad=True, **kwargs) elif mode.lower() == 'adam_hyp3': betas = kwargs.pop('betas', (0., .99)) opt = optim.Adam(params, *args, betas=betas, amsgrad=True, **kwargs) elif mode.lower() == 'adam_sparse': betas = kwargs.pop('betas', (.9, .999)) opt = optim.SparseAdam(params, *args, weight_decay=1e-4, betas=betas) elif mode.lower() == 'adam_sparse_hyp2': betas = kwargs.pop('betas', (.5, .99)) opt = optim.SparseAdam(params, *args, betas=betas) elif mode.lower() == 'adam_sparse_hyp3': betas = kwargs.pop('betas', (.0, .99)) opt = optim.SparseAdam(params, *args, betas=betas) elif mode.lower() == 'adabound': opt = adabound.AdaBound(params, *args, final_lr=0.1) else: raise NotImplementedError() return opt
def get_optimizer(model, args): if args.optimizer == 'sgd': return torch.optim.SGD( model.parameters(), # model.parameters(), args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) elif args.optimizer == 'rmsprop': return torch.optim.RMSprop( model.parameters(), # model.parameters(), args.lr, alpha=args.alpha, weight_decay=args.weight_decay) elif args.optimizer == 'adam': return torch.optim.Adam( model.parameters(), # model.parameters(), args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer == 'AdaBound': return adabound.AdaBound( model.parameters(), # model.parameters(), lr=args.lr, final_lr=args.final_lr) elif args.optimizer == 'radam': return RAdam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) else: raise NotImplementedError
return l_list, t_list if __name__ == "__main__": Option = TrainOptions() args = Option.parse() Option.print_opt() model = IAQA_model(args) device = torch.device( "cuda" if args.gpu and torch.cuda.is_available() else "cpu") model = model.to(device) model.apply(inplace_relu) #optimizer = torch.optim.Adam(model.parameters(),lr = args.lr, weight_decay = 1e-3) #optimizer = torch.optim.SGD(model.parameters(),lr = args.lr,momentum = 0.9,weight_decay=5e-4) optimizer = adabound.AdaBound(model.parameters(), lr=args.lr, final_lr=0.1) #Adabound: Adaboost+ SGD criterion = RegressionLoss() tensorboard_dir = os.path.join(args.runs, args.name) writer = SummaryWriter(tensorboard_dir) checkpoint_path = os.path.join(args.checkpoints_dir, args.name) if not os.path.exists(checkpoint_path): os.makedirs(checkpoint_path) checkpoints = os.path.join(checkpoint_path, '{net}-{epoch}-{type}.pth') best_criterion = -1 lr = args.lr train_loader, val_loader = get_train_dataloader(args) test_loader = get_test_dataloader(args) for epoch in range(1, args.epochs + 1):
for inputs, labels in self.dataloaders['valid']: inputs = inputs.type(self.input_type) self.writer.add_graph(self.model, inputs) break print('Model structure drawing is completed') # model = Net(weight='resnet152_101.pth') model = pretrainedmodels.__dict__['senet154'](num_classes=1000, pretrained='imagenet') model.last_linear = nn.Linear(model.last_linear.in_features, 13051) # self.model.load_state_dict(torch.load('best.pth')) criterion = nn.CrossEntropyLoss() optimizer = adabound.AdaBound(model.parameters(), lr=1e-2, final_lr=0.1, weight_decay=1e-7, amsbound=True) # optimizer = optim.Adam(model.parameters(), lr=1e-3) # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer) S = Solution( model, criterion, optimizer, './cars_data', scheduler=None, epochs=100, batch_size=8, thread_size=num_cpu, show_interval=20, valid_interval=2000,
train_fields = [('id', None), ('sentiment', LABEL), ('review', TEXT)] test_fields = [('id', None), ('review', TEXT)] x_var = 'review' y_vars = ['sentiment'] x_test_var = 'review' nonbertdata = NonBertData(train_pth, train_fields, test_pth, test_fields, TEXT, LABEL, x_var, y_vars, x_test_var, None) train_iter, test_iter = nonbertdata.getdata() model = MyBert(len(TEXT.vocab), TEXT.vocab.vectors) # a=list(model.parameters()) # optimizer = torch.optim.Adam(model.parameters(), lr=2e-4) ###################################################### import adabound optimizer = adabound.AdaBound(model.parameters(), lr=2e-5, final_lr=0.1) ####################################################### train_test = True modelname = "./models_pkl/Bert_4layers_MODEL.pkl" if train_test: trainer = Trainer(model, train_iter, optimizer, modelname, use_GPU=True) trainer.train(1000, pretrain_pth=modelname, padding_idx=1) else: tester = Tester(model, test_iter, use_GPU=True) df = tester.test(modelname, padding_idx=1) data = pd.read_csv(test_pth, header=0, delimiter='\t')
model_params['word_embedding.weight'][idx] = torch.FloatTensor(word_embedding.wv[word_embedding.wv.index2entity[idx-3]]) k_means_weight = np.load('%s/%s.k_means.npy' % (conf.target_path, conf.data_name)) model_params['transform_T.weight'] = torch.FloatTensor(k_means_weight.transpose()) # (aspect_dimesion, word_dimension) model.load_state_dict(model_params) ''' model.load_state_dict( torch.load('%s/train_%s_abae_id_adabound.mod' % (conf.model_path, conf.data_name))) model.cuda() #optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate) import adabound optimizer = adabound.AdaBound(model.parameters(), lr=conf.learning_rate, final_lr=0.1) ########################### FIRST TRAINING ##################################### check_dir('%s/train_%s_abae_id_x.log' % (conf.out_path, conf.data_name)) log = Logging('%s/train_%s_abae_id_adabound.log' % (conf.out_path, conf.data_name)) train_model_path = '%s/train_%s_abae_id_adabound.mod' % (conf.out_path, conf.data_name) # prepare data for the training stage train_dataset = data_utils.TrainData(train_data, train_review_embedding) val_dataset = data_utils.TrainData(val_data, val_review_embedding) test_dataset = data_utils.TrainData(test_data, test_review_embedding) train_batch_sampler = data.BatchSampler(data.RandomSampler(
def engine( ckpt_loc: str='ckpt-default', device_id: int=1, batch_size: int=128, use_cuda: bool=True, num_embeddings: int=8, casual_hidden_sizes: t.Iterable=[16, 32], num_botnec_feat: int=72, num_k_feat: int=24, num_dense_layers: int=20, num_out_feat: int=268, num_z_feat: int=10, activation: str='elu', LR: float=1e-3, final_lr: float=0.1, init_beta: float=0., final_beta: float=1., num_annealing_steps: int=2000, # beta: float=0.25, grad_clip=3.0, num_epochs: int=5, num_p=1 ): beta_step_len = (final_beta - init_beta) / num_annealing_steps model = GraphInf( num_in_feat=43, num_c_feat=8, num_embeddings=num_embeddings, casual_hidden_sizes=casual_hidden_sizes, num_botnec_feat=num_botnec_feat, # 16 x 4 num_k_feat=num_k_feat, # 16 num_dense_layers=num_dense_layers, num_out_feat=num_out_feat, num_z_feat=num_z_feat, activation=activation, use_cuda=use_cuda ) optim = adabound.AdaBound( model.parameters(), lr=LR, final_lr=final_lr ) device = torch.device(f'cuda:{device_id}') model = model.to(device) model.train() save_loc = path.join( path.dirname(__file__), 'ckpt', ckpt_loc ) events_loc = path.join(save_loc, 'events') if not path.exists(events_loc): makedirs(events_loc) try: with SummaryWriter(events_loc) as writer: step = 0 has_nan_or_inf = False train_loader = ComLoader( original_scaffolds_file='data-center/train.smi', num_workers=num_p, batch_size=batch_size ) test_loader = ComLoader( original_scaffolds_file='data-center/test.smi', num_workers=num_p, batch_size=batch_size ) for epoch in ipb(range(num_epochs), desc='epochs'): iter_train = iter(train_loader) iter_test = iter(test_loader) try: if has_nan_or_inf: break for i in ipb(range( train_loader.num_id_block + train_loader.num_id_block // 200 ), desc='iteration'): if step > 0 and step % 200 == 0: batch = next(iter_test) else: batch = next(iter_train) ( block, nums_nodes, nums_edges, seg_ids, bond_info_all, nodes_o, nodes_c ) = batch beta = min(init_beta + beta_step_len * step, 1) num_N = sum(nums_nodes) num_E = sum(nums_edges) values = torch.ones(num_E) s_adj = torch.sparse_coo_tensor( bond_info_all.T, values, torch.Size([num_N, num_N]) ).to(device) s_nfeat = torch.from_numpy(nodes_o).to(device) c_nfeat = torch.from_numpy(nodes_c).to(device) x_recon, mu1, logvar1, mu2, logvar2 = ( model(s_nfeat, c_nfeat, s_adj) ) seg_ids = torch.from_numpy(seg_ids) optim.zero_grad() MSE, KL = loss_func( x_recon, s_nfeat, mu1, logvar1, mu2, logvar2, seg_ids ) loss = MSE + beta * KL if not (step > 0 and step % 200 == 0): loss.backward() torch.nn.utils.clip_grad_norm_( model.parameters(), grad_clip ) optim.step() # debug for Nan in recon loss has_nan_or_inf = torch.cat( [ torch.stack( ( torch.isnan(params.grad).any(), torch.isinf(params.grad).any() ), dim=-1 ) for params in model.parameters() ], dim=-1 ).any() if has_nan_or_inf: torch.save( model, path.join(save_loc, f'broken_{epoch}.ckpt') ) torch.save( s_nfeat, path.join(save_loc, f's_nfeat_{epoch}.pt') ) torch.save( c_nfeat, path.join(save_loc, f'c_nfeat_{epoch}.pt') ) torch.save( s_adj.to_dense(), path.join(save_loc, f's_adj_{epoch}.pt') ) torch.save( seg_ids, path.join(save_loc, f'seg_ids_{epoch}.pt') ) with open( path.join(save_loc, 'batch.smi'), 'w' ) as f: for smiles in block: f.write(smiles + '\n') break if not (step > 0 and step % 200 == 0): writer.add_scalar( f'loss', loss.cpu().item(), step ) writer.add_scalar( f'recon_loss', MSE.cpu().item(), step ) writer.add_scalar( f'KL', KL.cpu().item(), step ) else: writer.add_scalar( f'test_loss', loss.cpu().item(), step ) writer.add_scalar( f'test_recon_loss', MSE.cpu().item(), step ) writer.add_scalar( f'test_KL', KL.cpu().item(), step ) step += 1 torch.save( model, path.join(save_loc, f'model_{epoch}.ckpt') ) except StopIteration: continue except KeyboardInterrupt: torch.save( model, path.join(save_loc, f'model_{epoch}.ckpt') )