def __init__(self, dataset=None, model_name=None, model_params=None, trainer_params=None, restore=None, device=None, pretrained_embeddings_path=None, tokenizer_path=None, load_external_dataset=None): self.graph_model = model_name(dataset.g, **model_params).to(device) self.model_params = model_params self.trainer_params = trainer_params self.device = device self.epoch = 0 self.restore_epoch = 0 self.batch = 0 self.dtype = torch.float32 if load_external_dataset is not None: logging.info("Loading external dataset") external_args, external_dataset = load_external_dataset() self.graph_model.g = external_dataset.g dataset = external_dataset self.create_node_embedder( dataset, tokenizer_path, n_dims=model_params["h_dim"], pretrained_path=pretrained_embeddings_path, n_buckets=trainer_params["embedding_table_size"]) self.create_objectives(dataset, tokenizer_path) if restore: self.restore_from_checkpoint(self.model_base_path) if load_external_dataset is not None: self.trainer_params[ "model_base_path"] = external_args.external_model_base self._create_optimizer() self.lr_scheduler = ExponentialLR(self.optimizer, gamma=1.0) # self.lr_scheduler = ReduceLROnPlateau(self.optimizer, patience=10, cooldown=20) self.summary_writer = SummaryWriter(self.model_base_path)
def build_optimizer(self, epochs, trn, gradient_accumulation, **kwargs): config = self.config model = self.model if isinstance(model, nn.DataParallel): model = model.module if self.config.transformer: transformer = model.encoder.transformer optimizer = Adam( set(model.parameters()) - set(transformer.parameters()), config.lr, (config.mu, config.nu), config.epsilon) if self.config.transformer_lr: num_training_steps = len(trn) * epochs // gradient_accumulation if self.config.separate_optimizer: transformer_optimizer, transformer_scheduler = \ build_optimizer_scheduler_with_transformer(transformer, transformer, config.transformer_lr, config.transformer_lr, num_training_steps, config.warmup_steps, config.weight_decay, adam_epsilon=1e-8) else: optimizer, scheduler = build_optimizer_scheduler_with_transformer( model, transformer, config.lr, config.transformer_lr, num_training_steps, config.warmup_steps, config.weight_decay, adam_epsilon=1e-8) transformer_optimizer, transformer_scheduler = None, None else: transformer.requires_grad_(False) transformer_optimizer, transformer_scheduler = None, None else: optimizer = Adam(model.parameters(), config.lr, (config.mu, config.nu), config.epsilon) transformer_optimizer, transformer_scheduler = None, None if self.config.separate_optimizer: scheduler = ExponentialLR(optimizer, config.decay**(1 / config.decay_steps)) # noinspection PyUnboundLocalVariable return optimizer, scheduler, transformer_optimizer, transformer_scheduler
def train(ds, val_ds, fold, train_idx, val_idx, config, num_workers=0, transforms=None, val_transforms=None, num_channels_changed=False, final_changed=False, cycle=False): os.makedirs(os.path.join('..', 'weights'), exist_ok=True) os.makedirs(os.path.join('..', 'logs'), exist_ok=True) save_path = os.path.join('..', 'weights', config.folder) model = models[config.network](num_classes=config.num_classes, num_channels=config.num_channels) estimator = Estimator(model, optimizers[config.optimizer], save_path, config=config, num_channels_changed=num_channels_changed, final_changed=final_changed) estimator.lr_scheduler = ExponentialLR(estimator.optimizer, config.lr_gamma)#LRStepScheduler(estimator.optimizer, config.lr_steps) callbacks = [ ModelSaver(1, ("fold"+str(fold)+"_best.pth"), best_only=True), ModelSaver(1, ("fold"+str(fold)+"_last.pth"), best_only=False), CheckpointSaver(1, ("fold"+str(fold)+"_checkpoint.pth")), # LRDropCheckpointSaver(("fold"+str(fold)+"_checkpoint_e{epoch}.pth")), ModelFreezer(), # EarlyStopper(10), TensorBoard(os.path.join('..', 'logs', config.folder, 'fold{}'.format(fold))) ] # if not num_channels_changed: # callbacks.append(LastCheckpointSaver("fold"+str(fold)+"_checkpoint_rgb.pth", config.nb_epoch)) hard_neg_miner = None#HardNegativeMiner(rate=10) # metrics = [('dr', dice_round)] trainer = PytorchTrain(estimator, fold=fold, callbacks=callbacks, hard_negative_miner=hard_neg_miner) train_loader = PytorchDataLoader(TrainDataset(ds, train_idx, config, transforms=transforms), batch_size=config.batch_size, shuffle=True, drop_last=True, num_workers=num_workers, pin_memory=True) val_loader = PytorchDataLoader(ValDataset(val_ds, val_idx, config, transforms=val_transforms), batch_size=1, shuffle=False, drop_last=False, num_workers=num_workers, pin_memory=True) trainer.fit(train_loader, val_loader, config.nb_epoch)
def train_setup(model, lr=1e-3, momentum=0.9, weight_decay=0.001, nesterov=True, gamma=0.975): """ train_setup( model, lr=1e-3, momentum=0.9, weight_decay=0.001, nesterov=True, gamma=0.975 ) Inputs ------ model : nn.Module lr : float Default: 1e-3 momentum : float Default: 0.9 weight_decay : float Default: 0.001 nesterov : bool Default: True gamma : float Default: 0.975 Outputs ------- criterion : torch.nn.CrossEntropyLoss Cross-entropy loss optimizer : torch.optim.SGD Stochastic Gradient Descent implementation with bells and whistles scheduler : lr_scheduler.ExponentialLR Learning rate annealer """ criterion = nn.CrossEntropyLoss() optimizer = optim.SGD( model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay, nesterov=nesterov, ) scheduler = ExponentialLR(optimizer, gamma=gamma) return criterion, optimizer, scheduler
def configure_optimizers(self): if self.args.optimizer == 'AdamW': optimizer = AdamW(self.parameters(), lr=self.args.lr) elif self.args.optimizer == 'AdamP': from adamp import AdamP optimizer = AdamP(self.parameters(), lr=self.args.lr) else: raise NotImplementedError('Only AdamW and AdamP is Supported!') if self.args.lr_scheduler == 'cos': scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=1, T_mult=2) elif self.args.lr_scheduler == 'exp': scheduler = ExponentialLR(optimizer, gamma=0.5) else: raise NotImplementedError( 'Only cos and exp lr scheduler is Supported!') return { 'optimizer': optimizer, 'scheduler': scheduler, }
def init_model(self): self.print_config() type_mask = self.build_type_mask() loss = CrossEntropyLoss(reduction='none') vocab_size = len(self._oov_frag_list) batch_per_gpu = int(self._emb_size / self._num_gpu) model = LSTM(vocab_size, self._emb_size, type_mask, loss, batch_per_gpu) model.cuda() optimizer = SGD(model.parameters(), lr=self._lr, momentum=self._momentum, weight_decay=self._weight_decay) scheduler = ExponentialLR(optimizer, gamma=self._gamma) return model, optimizer, scheduler
def get_triplet_loss(self, cfg): self.triplet = TripletLoss(cfg.LOSS.MARGIN, learning_weight=False) if cfg.MODEL.DEVICE is 'cuda': self.triplet = self.triplet.cuda() if self.triplet.learning_weight: self.triplet.optimizer = torch.optim.SGD(self.triplet.parameters(), lr=0.0001, momentum=0.9, weight_decay=10**-4, nesterov=True) self.triplet.scheduler = ExponentialLR(self.triplet.optimizer, gamma=0.95, last_epoch=-1) def loss_function(data: Data): return cfg.LOSS.METRIC_LOSS_WEIGHT * self.triplet( data.feat_t, data.cls_label) return loss_function
def main(): # Training settings use_cuda = torch.cuda.is_available() torch.manual_seed(1234) device = torch.device("cuda" if use_cuda else "cpu") train_kwargs = {'batch_size': 16} test_kwargs = {'batch_size': 16} if use_cuda: cuda_kwargs = {'num_workers': 1, 'pin_memory': True, 'shuffle': True} train_kwargs.update(cuda_kwargs) test_kwargs.update(cuda_kwargs) batch_size = 32 lr = 5e-3 gamma = 0.95 epochs = 100 log_interval = 20 dataset1 = Dataset(PATH_TO_DATA, part_to_train=0.8) dataset2 = Dataset(PATH_TO_DATA, is_train=False, part_to_train=0.9) train_loader = torch.utils.data.DataLoader(dataset1, batch_size=batch_size, num_workers=1) test_loader = torch.utils.data.DataLoader(dataset2, batch_size=batch_size, num_workers=1) model = UNet(125, 257).to(device) optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = ExponentialLR(optimizer, gamma=gamma) for epoch in range(1, epochs + 1): print('Training...') train(log_interval, model, device, train_loader, optimizer, epoch) print('Testing...') test(model, device, test_loader) scheduler.step() torch.save(model.state_dict(), "eeg_msc.pt")
def __init__(self, in_dim, hid_dim, T, lr, lr_gamma=0.95): super(Q_Fun, self).__init__() self.in_dim = in_dim self.hid_dim = hid_dim Linear = partial(nn.Linear, bias=False) self.lin5 = Linear(2*hid_dim, 1) self.lin6 = Linear(hid_dim, hid_dim) self.lin7 = Linear(hid_dim, hid_dim) self.S2Vs = nn.ModuleList([S2V(in_dim=in_dim, out_dim=hid_dim)]) for i in range(T - 1): self.S2Vs.append(S2V(hid_dim, hid_dim)) self.loss = nn.MSELoss self.optimizer = optim.Adam(self.parameters(), lr=lr) self.scheduler = ExponentialLR(self.optimizer, gamma=lr_gamma) self.device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") self.to(self.device)
def test_compound_cosanneal_and_exp_lr(self): epochs = 10 eta_min = 1e-10 single_targets = [ eta_min + (0.05 - eta_min) * (1 + math.cos(math.pi * x / epochs)) / 2 for x in range(epochs) ] multipliers = [0.1**i for i in range(epochs)] single_targets = [x * y for x, y in zip(single_targets, multipliers)] targets = [ single_targets, list(map(lambda x: x * epochs, single_targets)) ] schedulers = [None] * 2 schedulers[0] = CosineAnnealingLR(self.opt, T_max=epochs, eta_min=eta_min) schedulers[1] = ExponentialLR(self.opt, gamma=0.1) self._test(schedulers, targets, epochs)
def get_optim(args, model): assert args.optimizer in optim_choices if args.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) elif args.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(args.momentum, args.momentum_sqr)) elif args.optimizer == 'adamax': optimizer = optim.Adamax(model.parameters(), lr=args.lr, betas=(args.momentum, args.momentum_sqr)) if args.warmup is not None: scheduler_iter = LinearWarmupScheduler(optimizer, total_epoch=args.warmup) else: scheduler_iter = None scheduler_epoch = ExponentialLR(optimizer, gamma=args.gamma) return optimizer, scheduler_iter, scheduler_epoch
def test_scheduler_with_param_groups(): def _test(lr_scheduler, optimizer): num_iterations = 10 max_epochs = 20 state_dict = lr_scheduler.state_dict() trainer = Engine(lambda engine, batch: None) @trainer.on(Events.ITERATION_COMPLETED) def save_lr(): lrs.append((optimizer.param_groups[0]["lr"], optimizer.param_groups[1]["lr"])) trainer.add_event_handler(Events.ITERATION_STARTED, lr_scheduler) data = [0] * num_iterations for _ in range(2): lrs = [] trainer.run(data, max_epochs=max_epochs) assert [lr[0] for lr in lrs] == pytest.approx([lr[1] for lr in lrs]) lr_scheduler.load_state_dict(state_dict) t1 = torch.zeros([1], requires_grad=True) t2 = torch.zeros([1], requires_grad=True) optimizer = torch.optim.SGD([{"params": t1, "lr": 0.1}, {"params": t2, "lr": 0.1}]) lr_scheduler = LinearCyclicalScheduler(optimizer, "lr", start_value=1.0, end_value=0.0, cycle_size=10) _test(lr_scheduler, optimizer) lr_scheduler = PiecewiseLinear( optimizer, "lr", milestones_values=[(5, 0.5), (15, 1.0), (25, 0.0), (35, 1.0), (40, 0.5)] ) _test(lr_scheduler, optimizer) lr_scheduler = CosineAnnealingScheduler(optimizer, "lr", start_value=0.0, end_value=1.0, cycle_size=10) _test(lr_scheduler, optimizer) torch_lr_scheduler = ExponentialLR(optimizer, gamma=0.98) _test(LRScheduler(torch_lr_scheduler), optimizer) torch_lr_scheduler = StepLR(optimizer, step_size=50, gamma=0.5) _test(LRScheduler(torch_lr_scheduler), optimizer)
def __init__(self, net_path=None, **kwargs): super(TrackerSiamFC, self).__init__('SiamFC', True) self.cfg = self.parse_args(**kwargs) # setup GPU device if available self.cuda = torch.cuda.is_available() self.device = torch.device('cuda:0' if self.cuda else 'cpu') # setup model #### 第2处. ############ 0.64为0.8*0.8,因为一共进行两次裁剪,每次保留80%, 1.0为非衰减权重的比率,测试时不需衰减,所以为1.0 #################### self.net = Net(backbone=AlexNetV1(.64, 1.0), head=SiamFC(self.cfg.out_scale)) ops.init_weights(self.net) # load checkpoint if provided if net_path is not None: self.net.load_state_dict( torch.load(net_path, map_location=lambda storage, loc: storage)) self.net = self.net.to(self.device) # convert to caffe model # sm = torch.jit.script(self.net) # sm.save("siamfc_model.pt") # setup criterion self.criterion = BalancedLoss() # setup optimizer self.optimizer = optim.SGD(self.net.parameters(), lr=self.cfg.initial_lr, weight_decay=self.cfg.weight_decay, momentum=self.cfg.momentum) # apex initialization # opt_level = 'O1' # self.net, self.optimizer = amp.initialize(self.net, self.optimizer, opt_level=opt_level) # setup lr scheduler gamma = np.power(self.cfg.ultimate_lr / self.cfg.initial_lr, 1.0 / self.cfg.epoch_num) self.lr_scheduler = ExponentialLR(self.optimizer, gamma)
def init_episode(self): opt = self.opt if opt['model'] == 'linear': self.model = LR(opt) elif opt['model'] == 'fm': self.model = FM(opt) elif opt['model'] == 'deepfm': self.model = DeepFM(opt) elif opt['model'] == 'autoint': self.model = AutoInt(opt) else: raise ValueError("Invalid FM model type: {}".format(opt['model'])) self._train_step_idx = 0 if self.use_cuda: use_cuda(True, opt['device_id']) self.model.cuda() self.optimizer = use_optimizer(self.model, opt) self.scheduler = ExponentialLR(self.optimizer, gamma=opt['lr_exp_decay'])
def test_adagrad(self): self._test_basic_cases( lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-1)) self._test_basic_cases(lambda weight, bias: optim.Adagrad( [weight, bias], lr=1e-1, initial_accumulator_value=0.1)) self._test_basic_cases(lambda weight, bias: optim.Adagrad( self._build_params_dict(weight, bias, lr=1e-2), lr=1e-1)) self._test_basic_cases( lambda weight, bias: optim.Adagrad( self._build_params_dict(weight, bias, lr=1e-2), lr=1e-1), [lambda opt: ReduceLROnPlateau(opt)]) self._test_basic_cases( lambda weight, bias: optim.Adagrad( self._build_params_dict(weight, bias, lr=1e-2), lr=1e-1), [ lambda opt: ReduceLROnPlateau(opt), lambda opt: ExponentialLR(opt, gamma=0.99) ]) with self.assertRaisesRegex(ValueError, "Invalid lr_decay value: -0.5"): optim.Adagrad(None, lr=1e-2, lr_decay=-0.5)
def train(self, args): # TODO no need to use args, as args is assigned to __init__ # build dataset train = TextDataSet(Conll.load(args.ftrain), self.fields_alias) train.build_loader(batch_size=args.batch_size, shuffle=True) dev = TextDataSet(Conll.load(args.fdev), self.fields_alias) dev.build_loader(batch_size=args.batch_size) test = TextDataSet(Conll.load(args.ftest), self.fields_alias) test.build_loader(batch_size=args.batch_size) self.criterion = nn.CrossEntropyLoss(reduction='mean') # Adam Optimizer self.optimizer = Adam(self.parser_model.parameters(), args.lr) # learning rate decrease # new_lr = initial_lr * gamma**epoch = initial_lr * 0.75**(epoch/5000) self.scheduler = ExponentialLR(self.optimizer, args.decay**(1 / args.decay_steps)) total_time = timedelta() best_epoch, metric = 0, Metric() for epoch in range(1, args.epochs + 1): start_time = datetime.now() print('training epoch {} :'.format(epoch)) loss, metric = self._train(args, train.data_loader) print('train loss: {}'.format(loss)) accuracy = self.evaluate(args, dev.data_loader) print('dev accuracy: {}'.format(accuracy)) time_diff = datetime.now() - start_time print('epoch time: {}'.format(time_diff)) total_time += time_diff # if accuracy > best_accuracy: # best_epoch = epoch # if epoch - best_epoch > args.patience: # break accuracy = self.evaluate(args, test.data_loader) print('test accuracy: {}'.format(accuracy)) print('total_time: {}'.format(total_time))
def setup_optim(self): params = add_weight_decay(self.model, self.train_config.weight_decay) self.optim = AdamW( params=params, lr=self.train_config.lr, weight_decay=self.train_config.weight_decay, betas=(self.train_config.beta1, self.train_config.beta2), ) if self.train_config.scheduler_type == 'cosine': self.optim_schedule = CosineAnnealingLR( self.optim, T_max=self.train_config.scheduler_period, eta_min=self.train_config.lr_min, ) elif self.train_config.scheduler_type == 'exponential': self.optim_schedule = ExponentialLR( self.optim, gamma=self.train_config.scheduler_gamma, ) elif self.train_config.scheduler_type == 'step': self.optim_schedule = StepLR( self.optim, step_size=self.train_config.scheduler_period, gamma=self.train_config.scheduler_gamma, ) elif self.train_config.scheduler_type == 'cyclic': self.optim = SGD( self.model.parameters(), lr=self.train_config.lr, weight_decay=self.train_config.weight_decay, momentum=0.9, ) self.optim_schedule = CyclicLR( self.optim, base_lr=self.train_config.lr_min, max_lr=self.train_config.lr, step_size_up=self.train_config.scheduler_period, gamma=self.train_config.scheduler_gamma, mode='exp_range', )
def get_lr_scheduler(opts, optimizer): if opts.lr_schedule == "step": lr_scheduler = MultiStepLR(optimizer=optimizer, milestones=opts.lr_epoch_decay, gamma=opts.lr_decay) elif opts.lr_schedule == "cosine": lr_scheduler = CosineAnnealingLR(optimizer=optimizer, T_max=opts.epoch) elif opts.lr_schedule == "exponential": lr_scheduler = ExponentialLR(optimizer=optimizer, gamma=opts.lr_decay) if opts.lr_scheduler_freq > 0: lr_scheduler = PeriodicLRDecorator(optimizer=optimizer, lr_scheduler=lr_scheduler, period=1. / opts.lr_scheduler_freq) if opts.warmup_epoch > 0: lr_scheduler = WarmUpLRDecorator(optimizer=optimizer, lr_scheduler=lr_scheduler, warmup_epoch=opts.warmup_epoch) return lr_scheduler
def __init__(self, opt): super(FMFactorizer, self).__init__(opt) self.opt = opt if opt['model'] == 'linear': self.model = LR(opt) elif opt['model'] == 'fm': self.model = FM(opt) elif opt['model'] == 'deepfm': self.model = DeepFM(opt) elif opt['model'] == 'autoint': self.model = AutoInt(opt) else: raise ValueError("Invalid FM model type: {}".format(opt['model'])) if self.use_cuda: use_cuda(True, opt['device_id']) self.model.cuda() self.optimizer = use_optimizer(self.model, opt) self.scheduler = ExponentialLR(self.optimizer, gamma=opt['lr_exp_decay'])
def load_finder(name, parameters, *, lr_min, lr_max, num_epochs, nesterov=False, beta1=0.9, beta2=0.999): if name == "adam": optimizer = optim.Adam(parameters, lr=lr_min, betas=(beta1, beta2)) elif name == "sgd": optimizer = optim.SGD(parameters, lr=lr_min, momentum=0.9, nesterov=nesterov) else: raise Exception("Optimizer {} not found".format(name)) scheduler = ExponentialLR(optimizer, gamma=(lr_max / lr_min)**(1 / num_epochs)) return optimizer, scheduler
def get_softmax_loss(self, cfg, num_classes): self.xent = MyCrossEntropy(num_classes=num_classes, label_smooth=cfg.LOSS.IF_LABEL_SMOOTH, learning_weight=cfg.LOSS.IF_LEARNING_WEIGHT) if cfg.MODEL.DEVICE is 'cuda': self.xent = self.xent.cuda() if self.xent.learning_weight: self.xent.optimizer = torch.optim.SGD(self.xent.parameters(), lr=0.0001, momentum=0.9, weight_decay=10**-4, nesterov=True) self.xent.scheduler = ExponentialLR(self.xent.optimizer, gamma=0.95, last_epoch=-1) def loss_function(data: Data): return cfg.LOSS.ID_LOSS_WEIGHT * self.xent(data.cls_score, data.cls_label) return loss_function
def configure_optimizers( self) -> Tuple[List[Optimizer], List[_LRScheduler]]: if self.hyperparams.optimizer == "Momentum": # using the same momentum value as in original realization by Alon optimizer = SGD( self.parameters(), self.hyperparams.learning_rate, momentum=0.95, nesterov=self.hyperparams.nesterov, weight_decay=self.hyperparams.weight_decay, ) elif self.hyperparams.optimizer == "Adam": optimizer = Adam(self.parameters(), self.hyperparams.learning_rate, weight_decay=self.hyperparams.weight_decay) else: raise ValueError( f"Unknown optimizer name: {self.hyperparams.optimizer}, try one of: Adam, Momentum" ) scheduler = ExponentialLR(optimizer, self.hyperparams.decay_gamma) return [optimizer], [scheduler]
def make_test_params(optimizer_class): cases = [ (lambda weight, bias: optimizer_class([weight, bias], lr=1e-3),), ( lambda weight, bias: optimizer_class( _build_params_dict(weight, bias, lr=1e-2), lr=1e-3 ), ), ( lambda weight, bias: optimizer_class( _build_params_dict_single(weight, bias, lr=1e-2), lr=1e-3 ), ), ( lambda weight, bias: optimizer_class( _build_params_dict_single(weight, bias, lr=1e-2) ), ), ( lambda weight, bias: optimizer_class([weight, bias], lr=1e-3), [lambda opt: StepLR(opt, gamma=0.9, step_size=10)], ), ( lambda weight, bias: optimizer_class([weight, bias], lr=1e-3), [ lambda opt: StepLR(opt, gamma=0.9, step_size=10), lambda opt: ReduceLROnPlateau(opt), ], ), ( lambda weight, bias: optimizer_class([weight, bias], lr=1e-3), [ lambda opt: StepLR(opt, gamma=0.99, step_size=10), lambda opt: ExponentialLR(opt, gamma=0.99), lambda opt: ReduceLROnPlateau(opt), ], ), ] ids = ['%s_%s' % (optimizer_class.__name__, i) for i in range(len(cases))] return cases, ids
def get_scheduler(self, args: OptimizerParams) -> _LRScheduler: """ Create the LR scheduler that will be used after warmup, based on the config params. """ scheduler: _LRScheduler epochs_after_warmup = self.num_epochs - self.warmup_epochs if args.l_rate_scheduler == LRSchedulerType.Exponential: scheduler = ExponentialLR(optimizer=self.optimizer, gamma=args.l_rate_exponential_gamma, last_epoch=self.last_epoch) elif args.l_rate_scheduler == LRSchedulerType.Step: scheduler = StepLR(optimizer=self.optimizer, step_size=args.l_rate_step_step_size, gamma=args.l_rate_step_gamma, last_epoch=self.last_epoch) elif args.l_rate_scheduler == LRSchedulerType.MultiStep: assert args.l_rate_multi_step_milestones is not None scheduler = MultiStepLR( optimizer=self.optimizer, milestones=args.l_rate_multi_step_milestones, gamma=args.l_rate_multi_step_gamma, last_epoch=self.last_epoch) elif args.l_rate_scheduler == LRSchedulerType.Polynomial: polynomial_lr = PolynomialLR( gamma=args.l_rate_polynomial_gamma, l_rate=args.l_rate, min_l_rate=args.min_l_rate, epochs_after_warmup=epochs_after_warmup) scheduler = LambdaLR(optimizer=self.optimizer, lr_lambda=polynomial_lr.get_lr, last_epoch=self.last_epoch) elif args.l_rate_scheduler == LRSchedulerType.Cosine: scheduler = CosineAnnealingLR(optimizer=self.optimizer, T_max=epochs_after_warmup, eta_min=args.min_l_rate, last_epoch=self.last_epoch) else: raise ValueError("Unknown learning rate scheduler {}".format( args.l_rate_scheduler)) return scheduler
def main(defaults=None, stdev=0.5): # default parameters if defaults is None: defaults = { 'epochs': 5, 'n_batch': 128, 'lr': 1e-3, 'gamma': 0.8, 'n_it': 3750, 'stdev': stdev, 'tensorboard': False } | (defaults if defaults else {}) else: defaults = defaults | {'stdev': stdev} # create the tensorboard if ask if (defaults["tensorboard"]): convnet.writer = SummaryWriter(args.writer) # take all the data dataLoad = getData(defaults['stdev']) # optimizer optim = Adam(convnet.parameters(), lr=defaults['lr']) lr = ExponentialLR(optim, gamma=defaults['gamma']) # training phase convnet.fit(dataLoad, optim, lr, epochs=defaults['epochs'], w="Loss") free(optim, lr) if isinstance(convnet.writer, dict): # save the hyper parameter to writer dict for key, value in defaults.items(): convnet.writer[key] = str(value) elif isinstance(convnet.writer, SummaryWriter): # save the hyper parameter to tensorboard for key, value in defaults.items(): convnet.writer.add_text(key, str(value)) convnet.writer.close()
def get_lr_decay(args, optimizer, epoch): if optimizer: if args.decay_type is None: return None elif args.decay_type == 'exponential': if args.decay_rate < 1.0: print( 'Setting exponential lr decay: current={}, decay_rate={}'. format(epoch, args.decay_rate)) lr_scheduler = ExponentialLR(optimizer, args.decay_rate, last_epoch=epoch) return lr_scheduler elif args.decay_type == 'lambda': # Learning rate update schedulers # lr_scheduler = torch.optim.lr_scheduler.LambdaLR( # optimizer, lr_lambda=LambdaLR(args.num_epochs, epoch, args.decay_epoch).step # ) return None #not handling right now else: raise Exception('not supported lr decay type: {}'.format(type)) return None
def __init__(self, name='SiamFC', weight=None, device='cpu', **kargs): super(TrackerSiamFC, self).__init__(name=name, is_deterministic=True) self.cfg = self.parse_args(**kargs) # setup GPU device if available self.device = device # setup model self.net = SiamFC() if weight is not None: self.net.load_state_dict(torch.load(weight), strict=False) self.net = self.net.to(self.device) # setup optimizer self.optimizer = optim.SGD(self.net.parameters(), lr=self.cfg.initial_lr, weight_decay=self.cfg.weight_decay, momentum=self.cfg.momentum) # setup lr scheduler self.lr_scheduler = ExponentialLR(self.optimizer, gamma=self.cfg.lr_decay)
def __init__(self, net_path=None, **kargs): super(DeepDual, self).__init__ self.cfg = self.parse_args(**kargs) self.CEloss = nn.CrossEntropyLoss() self.cuda = torch.cuda.is_available() self.device = torch.device('cuda') self.net = DeepDualNet() """ if net_path is not None: self.net.load_state_dict(torch.load( net_path, map_location=lambda storage, loc: storage)) """ #self.net = nn.DataParallel(self.net,device_ids=[0,1]).cuda() self.net = nn.DataParallel(self.net).cuda() self.optimizer = optim.SGD(self.net.parameters(), lr=self.cfg.initial_lr, weight_decay=self.cfg.weight_decay, momentum=self.cfg.momentum) # setup lr scheduler self.lr_scheduler = ExponentialLR(self.optimizer, gamma=self.cfg.lr_decay)
def train(self): logging.info('Start Training...') data_shoter = BiOneShotIterator(*self.__kg.train_data_iterator()) optimizer, init_step, current_lr = self._get_optimizer() scheduler = ExponentialLR(optimizer=optimizer, gamma=config.decay_rate) max_mrr = 0.0 training_logs = [] # Training Loop for step in range(init_step, config.max_step + 1): log = LineaRE.train_step(self.__cal_model, optimizer, data_shoter.next()) training_logs.append(log) # log if step % config.log_step == 0: metrics = {} for metric in training_logs[0].keys(): metrics[metric] = sum([log[metric] for log in training_logs]) / len(training_logs) self._log_metrics('Training', step, metrics) training_logs.clear() # valid if step % config.valid_step == 0: logging.info(f'---------- Evaluating on Valid Dataset ----------') metrics = LineaRE.test_step(self.__cal_model, self.__kg.test_data_iterator(test='valid'), True) self._log_metrics('Valid', step, metrics) logging.info('-----------------------------------------------') if metrics[0]['MRR'] >= max_mrr: max_mrr = metrics[0]['MRR'] save_variable_list = { 'step': step, 'current_lr': current_lr, } self._save_model(optimizer, save_variable_list) logging.info(f'Find a better model, it has been saved in \'{config.save_path}\'!') if step / config.max_step in [0.2, 0.5, 0.8]: scheduler.step() current_lr *= config.decay_rate logging.info(f'Change learning_rate to {current_lr} at step {step}') logging.info('Training Finished!')
def get_center_loss(self, cfg, feat_dim, num_classes): self.center = CenterLoss(num_classes=num_classes, feat_dim=feat_dim, loss_weight=cfg.LOSS.CENTER_LOSS_WEIGHT, learning_weight=False) self.center.optimizer = torch.optim.SGD(self.center.parameters(), lr=cfg.OPTIMIZER.LOSS_LR, momentum=0.9, weight_decay=10**-4, nesterov=True) self.center.scheduler = ExponentialLR(self.center.optimizer, gamma=0.995, last_epoch=-1) if cfg.MODEL.DEVICE is 'cuda': self.center = self.center.cuda() if cfg.APEX.IF_ON: self.center.to(torch.half) def loss_function(data: Data): return cfg.LOSS.CENTER_LOSS_WEIGHT * self.center( data.feat_t, data.cls_label) return loss_function