def __init__(self, policy_net, value_net): super(Reinforce, self).__init__() self.algorithm = update.reinforce_update # these are target networks that we need for ddpg algorigm to work target_policy_net = copy.deepcopy(policy_net) target_value_net = copy.deepcopy(value_net) target_policy_net.eval() target_value_net.eval() # soft update utils.soft_update(value_net, target_value_net, soft_tau=1.0) utils.soft_update(policy_net, target_policy_net, soft_tau=1.0) # define optimizers value_optimizer = optim.Ranger(value_net.parameters(), lr=1e-5, weight_decay=1e-2) policy_optimizer = optim.Ranger(policy_net.parameters(), lr=1e-5, weight_decay=1e-2) self.nets = { "value_net": value_net, "target_value_net": target_value_net, "policy_net": policy_net, "target_policy_net": target_policy_net, } self.optimizers = { "policy_optimizer": policy_optimizer, "value_optimizer": value_optimizer, } self.params = { "reinforce": ChooseREINFORCE(ChooseREINFORCE.basic_reinforce), "K": 10, "gamma": 0.99, "min_value": -10, "max_value": 10, "policy_step": 10, "soft_tau": 0.001, } self.loss_layout = { "test": { "value": [], "policy": [], "step": [] }, "train": { "value": [], "policy": [], "step": [] }, }
def __init__(self, policy_net, value_net): super(Reinforce, self).__init__() self.algorithm = reinforce_update # define optimizers value_optimizer = optim.Ranger(value_net.parameters(), lr=1e-5, weight_decay=1e-2) policy_optimizer = optim.Ranger(policy_net.parameters(), lr=1e-5, weight_decay=1e-2) self.nets = { "value_net": value_net, "policy_net": policy_net, } self.optimizers = { "policy_optimizer": policy_optimizer, "value_optimizer": value_optimizer, } self.params = { "reinforce": ChooseREINFORCE(ChooseREINFORCE.reinforce_with_TopK_correction), "K": 10, "gamma": 0.99, "min_value": -10, "max_value": 10, "policy_step": 10, "soft_tau": 0.001, } self.loss_layout = { "test": { "value": [], "policy": [], "step": [] }, "train": { "value": [], "policy": [], "step": [] }, }
def get_optimizer(hparams, models): eps = 1e-8 parameters = get_parameters(models) if hparams.optimizer == 'sgd': optimizer = SGD(parameters, lr=hparams.lr, momentum=hparams.momentum, weight_decay=hparams.weight_decay) elif hparams.optimizer == 'adam': optimizer = Adam(parameters, lr=hparams.lr, eps=eps, weight_decay=hparams.weight_decay) elif hparams.optimizer == 'radam': optimizer = torch_optimizer.RAdam(parameters, lr=hparams.lr, eps=eps, weight_decay=hparams.weight_decay) elif hparams.optimizer == 'ranger': optimizer = torch_optimizer.Ranger(parameters, lr=hparams.lr, eps=eps, weight_decay=hparams.weight_decay) else: raise ValueError('optimizer not recognized!') return optimizer
def configure_optimizers(self): parameters = filter(lambda p: p.requires_grad, self.parameters()) if self.hp.optimizer == "sgd": self.opt = optim.SGD(parameters, lr=self.hparams.lr, momentum=0.9, weight_decay=5e-4) elif self.hp.optimizer == "adam": self.opt = optim.Adam(parameters, lr=self.hparams.lr, weight_decay=5e-4) elif self.hp.optimizer == "ranger": self.opt = optim_extra.Ranger(parameters, lr=self.hparams.lr, alpha=0.5, k=6, N_sma_threshhold=5, betas=(.95, 0.999), eps=1e-5, weight_decay=0) else: raise Exception("unknown optimizer") print(f"optimizer: {self.opt}") line() self.scheduler = optim.lr_scheduler.StepLR( self.opt, step_size=self.hp.lr_anneal_step, gamma=self.hp.lr_anneal_gamma) return [self.opt]
def get_optimizer(optimizer: str, model, optimizer_args): if optimizer == "sgd": return torch.optim.SGD(model.parameters(), **optimizer_args) elif optimizer == "adam": return torch.optim.Adam(model.parameters(), **optimizer_args) elif optimizer == "yogi": return optim.Yogi(model.parameters(), **optimizer_args) elif optimizer == "shampoo": return optim.Shampoo(model.parameters(), **optimizer_args) elif optimizer == "swats": return optim.SWATS(model.parameters(), **optimizer_args) elif optimizer == "sgdw": return optim.SGDW(model.parameters(), **optimizer_args) elif optimizer == "sgdp": return optim.SGDP(model.parameters(), **optimizer_args) elif optimizer == "rangerva": return optim.RangerVA(model.parameters(), **optimizer_args) elif optimizer == "rangerqh": return optim.RangerQH(model.parameters(), **optimizer_args) elif optimizer == "ranger": return optim.Ranger(model.parameters(), **optimizer_args) elif optimizer == "radam": return optim.RAdam(model.parameters(), **optimizer_args) elif optimizer == "qhm": return optim.QHM(model.parameters(), **optimizer_args) elif optimizer == "qhadam": return optim.QHAdam(model.parameters(), **optimizer_args) elif optimizer == "pid": return optim.PID(model.parameters(), **optimizer_args) elif optimizer == "novograd": return optim.NovoGrad(model.parameters(), **optimizer_args) elif optimizer == "lamb": return optim.Lamb(model.parameters(), **optimizer_args) elif optimizer == "diffgrad": return optim.DiffGrad(model.parameters(), **optimizer_args) elif optimizer == "apollo": return optim.Apollo(model.parameters(), **optimizer_args) elif optimizer == "aggmo": return optim.AggMo(model.parameters(), **optimizer_args) elif optimizer == "adamp": return optim.AdamP(model.parameters(), **optimizer_args) elif optimizer == "adafactor": return optim.Adafactor(model.parameters(), **optimizer_args) elif optimizer == "adamod": return optim.AdaMod(model.parameters(), **optimizer_args) elif optimizer == "adabound": return optim.AdaBound(model.parameters(), **optimizer_args) elif optimizer == "adabelief": return optim.AdaBelief(model.parameters(), **optimizer_args) elif optimizer == "accsgd": return optim.AccSGD(model.parameters(), **optimizer_args) elif optimizer == "a2graduni": return optim.A2GradUni(model.parameters(), **optimizer_args) elif optimizer == "a2gradinc": return optim.A2GradInc(model.parameters(), **optimizer_args) elif optimizer == "a2gradexp": return optim.A2GradExp(model.parameters(), **optimizer_args) else: raise Exception(f"Optimizer '{optimizer}' does not exist!")
def configure_optimizers(self): optims = [] scheds = [] if self.model.only_auxillary_training: print('Learning Rate changed for auxillary training') self.params['LR'] = 0.00001 optimizer = optim_.Ranger(self.model.parameters(), lr=self.params['LR'], weight_decay=self.params['weight_decay']) optims.append(optimizer) # Check if more than 1 optimizer is required (Used for adversarial training) try: if self.params['LR_2'] is not None: optimizer2 = optim_.AdamP(getattr( self.model, self.params['submodel']).parameters(), lr=self.params['LR_2']) optims.append(optimizer2) except: pass # scheduler = optim.lr_scheduler.ExponentialLR(optims[0], # gamma = self.params['scheduler_gamma'], last_epoch=450) scheduler = optim.lr_scheduler.ReduceLROnPlateau( optims[0], 'min', verbose=True, factor=self.params['scheduler_gamma'], min_lr=0.0001, patience=int(self.model.memory_leak_epochs / 7)) # scheduler = optim.lr_scheduler.CyclicLR(optims[0], self.params['LR']*0.1, self.params['LR'], mode='exp_range', # gamma = self.params['scheduler_gamma']) # scheduler = optim.lr_scheduler.OneCycleLR(optims[0], max_lr=self.params['LR'], steps_per_epoch=130, epochs=2000) # scheduler = GradualWarmupScheduler(optims[0], multiplier=1, total_epoch=20, # after_scheduler=scheduler) scheds.append( { 'scheduler': scheduler, 'monitor': 'val_loss', # Default: val_loss 'interval': 'epoch', 'frequency': 1, }, ) # Check if another scheduler is required for the second optimizer try: if self.params['scheduler_gamma_2'] is not None: scheduler2 = optim.lr_scheduler.ExponentialLR( optims[1], gamma=self.params['scheduler_gamma_2']) scheds.append(scheduler2) except: pass print('USING WARMUP SCHEDULER') return optims, scheds
def make_optimizer(config_dict: Dict[str, Any], model: nn.Module): cp: Dict[str, Any] = deepcopy(config_dict) n = cp.pop("name").lower() optimizer: Optimizer if n == "adam": optimizer = optim.Adam(model.parameters(), **cp) elif n == "radam": optimizer = torch_optimizer.RAdam(model.parameters(), **cp) elif n == "ranger": optimizer = torch_optimizer.Ranger(model.parameters(), **cp) elif n == "sgd": optimizer = optim.SGD(model.parameters(), **cp) else: raise ValueError(n) return optimizer
def get_optimizer(hparams, optimizer_grouped_parameters): if hparams.optimizer_type == "ranger": optimizer = torch_optimizer.Ranger( optimizer_grouped_parameters, lr=hparams.learning_rate, k=hparams.ranger_k, eps=hparams.adam_epsilon, ) elif hparams.optimizer_type == "qhadam": optimizer = torch_optimizer.QHAdam( optimizer_grouped_parameters, lr=hparams.learning_rate, nus=(0.1, 1.0), betas=(0.9, 0.999), eps=hparams.adam_epsilon, ) elif hparams.optimizer_type == "radam": optimizer = torch_optimizer.RAdam( optimizer_grouped_parameters, lr=hparams.learning_rate, betas=(0.9, 0.999), eps=hparams.adam_epsilon, ) elif hparams.optimizer_type == "adabound": optimizer = torch_optimizer.AdaBound( optimizer_grouped_parameters, lr=hparams.learning_rate, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, eps=hparams.adam_epsilon, amsbound=False, ) else: optimizer = torch.optim.AdamW( optimizer_grouped_parameters, lr=hparams.learning_rate, eps=hparams.adam_epsilon, ) return optimizer
def optimizer_chosen(self, model_param): try: optimizer_dict = { 'sgd': optim.SGD(params=model_param, lr=self.config.LEARNING_RATE, momentum=self.config.LEARNING_MOMENTUM, nesterov=True), 'adam': optim.Adam(params=model_param, lr=self.config.LEARNING_RATE), 'adadelta': optim.Adadelta(params=model_param, lr=self.config.LEARNING_RATE), 'adagrad': optim.Adagrad(params=model_param, lr=self.config.LEARNING_RATE), 'adamax': optim.Adamax(params=model_param, lr=self.config.LEARNING_RATE), 'adamw': optim.AdamW(params=model_param, lr=self.config.LEARNING_RATE), 'asgd': optim.ASGD(params=model_param, lr=self.config.LEARNING_RATE), 'rmsprop': optim.RMSprop(params=model_param, lr=self.config.LEARNING_RATE), 'radam': torch_optimizer.RAdam(params=model_param, lr=self.config.LEARNING_RATE), 'ranger': torch_optimizer.Ranger(params=model_param, lr=self.config.LEARNING_RATE) }[self.config.OPTIMIZER.lower()] return optimizer_dict except Exception as e: message = f"Invalid optimizers {e}" raise Exception(message)
def get_optimizer(model, optimizer_name, scheduler_name): if optimizer_name == 'Adam': optimizer = Adam(model.parameters(), lr=learning_rate) elif optimizer_name == 'AdamW': optimizer = AdamW(model.parameters(), lr=learning_rate) elif optimizer_name == 'AdamP': optimizer = AdamP(model.parameters(), lr=learning_rate) elif optimizer_name == 'MADGRAD': optimizer = madgrad.MADGRAD(model.parameters(), lr=learning_rate) else: optimizer = optim.Ranger(model.parameters(), lr=learning_rate, alpha=0.6, k=10) if scheduler_name == 'step': scheduler = StepLR(optimizer, 10, gamma=0.5) elif scheduler_name == 'reduce': scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=10) else: scheduler = CosineAnnealingLR(optimizer, T_max=2, eta_min=0.) return optimizer, scheduler
def optimizer_chosen(self, model_param): try: optimizer_dict = { 'sgd': optim.SGD(params=model_param, lr=self.config.LEARNING_RATE, momentum=0.9, nesterov=True), 'adam': optim.Adam(params=model_param, lr=self.config.LEARNING_RATE), 'adadelta': optim.Adadelta(params=model_param, lr=self.config.LEARNING_RATE), 'adagrad': optim.Adagrad(params=model_param, lr=self.config.LEARNING_RATE), 'adamax': optim.Adamax(params=model_param, lr=self.config.LEARNING_RATE), 'adamw': optim.AdamW(params=model_param, lr=self.config.LEARNING_RATE), 'asgd': optim.ASGD(params=model_param, lr=self.config.LEARNING_RATE), 'rmsprop': optim.RMSprop(params=model_param, lr=self.config.LEARNING_RATE), 'radam': torch_optimizer.RAdam(params=model_param, lr=self.config.LEARNING_RATE), 'ranger': torch_optimizer.Ranger(params=model_param, lr=self.config.LEARNING_RATE) }[self.config.OPTIMIZER.lower()] return optimizer_dict except KeyError: print("Invalid optimizers")
def __init__(self, policy_net, value_net1, value_net2): super(TD3, self).__init__() self.algorithm = update.td3_update # these are target networks that we need for TD3 algorigm to work target_policy_net = copy.deepcopy(policy_net) target_value_net1 = copy.deepcopy(value_net1) target_value_net2 = copy.deepcopy(value_net2) target_policy_net.eval() target_value_net1.eval() target_value_net2.eval() # soft update utils.soft_update(value_net1, target_value_net1, soft_tau=1.0) utils.soft_update(value_net2, target_value_net2, soft_tau=1.0) utils.soft_update(policy_net, target_policy_net, soft_tau=1.0) # define optimizers value_optimizer1 = optim.Ranger(value_net1.parameters(), lr=1e-5, weight_decay=1e-2) value_optimizer2 = optim.Ranger(value_net2.parameters(), lr=1e-5, weight_decay=1e-2) policy_optimizer = optim.Ranger(policy_net.parameters(), lr=1e-5, weight_decay=1e-2) self.nets = { "value_net1": value_net1, "target_value_net1": target_value_net1, "value_net2": value_net2, "target_value_net2": target_value_net2, "policy_net": policy_net, "target_policy_net": target_policy_net, } self.optimizers = { "policy_optimizer": policy_optimizer, "value_optimizer1": value_optimizer1, "value_optimizer2": value_optimizer2, } self.params = { "gamma": 0.99, "noise_std": 0.5, "noise_clip": 3, "soft_tau": 0.001, "policy_update": 10, "policy_lr": 1e-5, "value_lr": 1e-5, "actor_weight_init": 25e-2, "critic_weight_init": 6e-1, } self.loss_layout = { "test": { "value1": [], "value2": [], "policy": [], "step": [] }, "train": { "value1": [], "value2": [], "policy": [], "step": [] }, }
def train_net(net, device, epochs=5, lr=0.01, batch_size=8, save_cp=True): net.to(device) train_dataset = BasicDataset(file_csv=args.train_csv, transform=train_transform) val_dataset = BasicDataset(file_csv=args.valid_csv, transform=train_transform) test_dataset = BasicDataset(file_csv=args.test_csv, transform=train_transform) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True) val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True, drop_last=True) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True, drop_last=True) writer = SummaryWriter(comment="_{}".format(args.name)) global_step = 0 n_train = len(train_dataset) n_valid = len(val_dataset) logging.info(f'''Starting training: Epochs: {epochs} Batch size: {batch_size} Learning rate: {lr} Training size: {n_train} Validation size: {n_valid} Checkpoints: {save_cp} Device: {device} ''') # optimizer = optim.RMSprop(net.parameters(), lr=lr, weight_decay=1e-8, momentum=0.9) # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min' if net.n_classes > 1 else 'max', patience=2) optimizer = torch_optimizer.Ranger(net.parameters(), lr=lr, weight_decay=0.0005) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10) loss_func = BinaryDiceLoss() for epoch in range(epochs): net.train() epoch_loss = 0 with tqdm(total=n_train, desc='Epoch {}/{}'.format(epoch + 1, epochs), unit='img') as pbar: for batch in train_dataloader: imgs = batch['image'] true_masks = batch['mask'] assert imgs.shape[1] == net.Unet.n_channels, \ 'Network has been defined with {} input channels, '.format( net.n_channels) + 'but loaded images have {} channels. Please check that '.format( imgs.shape[1]) + 'the images are loaded correctly.' imgs = imgs.to(device=device, dtype=torch.float32) mask_type = torch.float32 if net.Unet.n_classes == 1 else torch.long true_masks = true_masks.to(device=device, dtype=mask_type) mask_pred, weight_score = net(imgs, true_masks) loss = loss_func(predict=mask_pred, target=true_masks, score=weight_score) epoch_loss += loss writer.add_scalar('Loss/train', loss, global_step=global_step) # writer.add_scalar('score/train', weight_score.item(), global_step=global_step) pbar.set_postfix(**{'loss (batch)': loss}) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_value_(net.parameters(), 0.1) optimizer.step() pbar.update(imgs.shape[0]) global_step += 1 # if global_step % (n_train // (10 * batch_size)) == 0: for tag, value in net.named_parameters(): tag = tag.replace('.', '/') writer.add_histogram('weights/' + tag, value.data.cpu().numpy(), global_step=global_step) # writer.add_histogram('grads/' + tag, value.gard.data.cpu().numpy(), global_step=global_step) val_score = eval_net_unet_pick(net, val_dataloader, device) scheduler.step(val_score) writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], global_step=global_step) if net.Unet.n_classes > 1: logging.info('Validation cross entropy: {}'.format(val_score)) writer.add_scalar('Loss/valid', val_score, global_step=global_step) else: logging.info('Validation cross entropy: {}'.format(val_score)) writer.add_scalar('Dice/valid', val_score, global_step=global_step) writer.add_images('images', imgs, global_step=global_step) if net.Unet.n_classes == 1: writer.add_images('masks/true', true_masks, global_step) writer.add_images('masks/pred', torch.sigmoid(mask_pred) > 0.5, global_step) if save_cp: dir_checkpoint = os.path.join("checkpoints", args.name) if not os.path.exists(dir_checkpoint): os.mkdir(dir_checkpoint) logging.info('Create checkopint directory') torch.save( net.state_dict(), os.path.join(dir_checkpoint, 'CP_epoch{}.pth'.format(epoch + 1))) logging.info('Checkpoint {} saved!'.format(epoch + 1)) test_score = eval_net_unet_pick(net, test_dataloader, device) logging.info('Test Dice Coeff: {}'.format(test_score)) writer.add_scalar('Dice/test', test_score, global_step=global_step) writer.close()
ad = recnn.nn.models.AnomalyDetector().to(cuda) ad.load_state_dict(torch.load('models/anomaly.pt')) ad.eval() target_policy_net.eval() target_value_net.eval() soft_update(value_net, target_value_net, soft_tau=1.0) soft_update(policy_net, target_policy_net, soft_tau=1.0) value_criterion = nn.MSELoss() # from good to bad: Ranger Radam Adam RMSprop value_optimizer = optim.Ranger( value_net.parameters(), #####CAMBIATO RANGER CON RADAM lr=params['value_lr'], weight_decay=1e-2) policy_optimizer = optim.Ranger(policy_net.parameters(), lr=params['policy_lr'], weight_decay=1e-5) loss = { 'test': { 'value': [], 'policy': [], 'step': [] }, 'train': { 'value': [], 'policy': [], 'step': []
target = df_data[['user_id', 'age', 'gender']].copy(deep=True) del df_data gc.collect() kfold = KFold(n_splits=5, shuffle=True, random_state=seed) for fold_id, (train_index, val_index) in enumerate(kfold.split(all_index)): model = Model(embeddings=[ creative_id_embedding, ad_id_embedding, advertiser_id_embedding, product_id_embedding ], device=device).to(device).to(device) criterion_age = nn.CrossEntropyLoss().to(device) criterion_gender = nn.CrossEntropyLoss().to(device) optimizer = optim.Ranger(filter(lambda p: p.requires_grad, model.parameters()), lr=5e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=1, min_lr=1e-12, verbose=True) early_stopping = EarlyStopping( file_name='../../models/age_m13_gender_m3_checkpoint{}.pt'.format( fold_id), patience=5, verbose=True, delta=0.00000001) model.set(criterion_age, criterion_gender, optimizer, scheduler,
def build_optimizer(cfg, model): name_optimizer = cfg.optimizer.type optimizer = None if name_optimizer == 'A2GradExp': optimizer = optim.A2GradExp(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'A2GradInc': optimizer = optim.A2GradInc(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'A2GradUni': optimizer = optim.A2GradUni(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'AccSGD': optimizer = optim.AccSGD(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'AdaBelief': optimizer = optim.AdaBelief(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'AdaBound': optimizer = optim.AdaBound(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'AdaMod': optimizer = optim.AdaMod(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'Adafactor': optimizer = optim.Adafactor(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'AdamP': optimizer = optim.AdamP(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'AggMo': optimizer = optim.AggMo(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'Apollo': optimizer = optim.Apollo(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'DiffGrad': optimizer = optim.DiffGrad(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'Lamb': optimizer = optim.Lamb(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'Lookahead': yogi = optim.Yogi(model.parameters(), lr=cfg.optimizer.lr) optimizer = optim.Lookahead(yogi, k=5, alpha=0.5) elif name_optimizer == 'NovoGrad': optimizer = optim.NovoGrad(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'PID': optimizer = optim.PID(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'QHAdam': optimizer = optim.QHAdam(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'QHM': optimizer = optim.QHM(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'RAdam': optimizer = optim.RAdam(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'Ranger': optimizer = optim.Ranger(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'RangerQH': optimizer = optim.RangerQH(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'RangerVA': optimizer = optim.RangerVA(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'SGDP': optimizer = optim.SGDP(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'SGDW': optimizer = optim.SGDW(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'SWATS': optimizer = optim.SWATS(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'Shampoo': optimizer = optim.Shampoo(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'Yogi': optimizer = optim.Yogi(model.parameters(), lr=cfg.optimizer.lr) elif name_optimizer == 'Adam': optimizer = torch.optim.Adam(model.parameters(), lr=cfg.optimizer.lr, weight_decay=cfg.optimizer.weight_decay) elif name_optimizer == 'SGD': optimizer = torch.optim.SGD(model.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) if optimizer is None: raise Exception('optimizer is wrong') return optimizer
def train_net(net, device, epochs=5, lr=0.01, batch_size=8, save_cp=True): net.to(device) train_dataset = BasicDataset(file_csv=args.train_csv, transform=train_transform) val_dataset = BasicDataset(file_csv=args.valid_csv, transform=train_transform) test_dataset = BasicDataset(file_csv=args.test_csv, transform=train_transform) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True) val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True, drop_last=True) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True, drop_last=True) writer = SummaryWriter(comment="_{}".format(args.name)) global_step = 0 n_train = len(train_dataset) n_valid = len(val_dataset) logging.info( f'''Starting training: Epochs: {epochs} Batch size: {batch_size} Learning rate: {lr} Training size: {n_train} Validation size: {n_valid} Checkpoints: {save_cp} Device: {device} ''' ) # optimizer = optim.RMSprop(net.parameters(), lr=lr, weight_decay=1e-8, momentum=0.9) # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min' if net.n_classes > 1 else 'max', patience=2) optimizer = torch_optimizer.Ranger(net.parameters(), lr=lr, weight_decay=0.0005) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10) loss_func = nn.CrossEntropyLoss() # for name, parms in net.named_parameters(): # print('-->name:', name, '-->grad_requirs:', parms.requires_grad, ' -->grad_value:', parms.grad) for epoch in range(epochs): net.train() epoch_loss = 0 with tqdm(total=n_train, desc='Epoch {}/{}'.format(epoch + 1, epochs), unit='img') as pbar: for batch in train_dataloader: imgs = batch['image'] true_masks = batch['mask'] label = batch['label'] assert imgs.shape[1] == net.Unet.n_channels, \ 'Network has been defined with {} input channels, '.format( net.n_channels) + 'but loaded images have {} channels. Please check that '.format( imgs.shape[1]) + 'the images are loaded correctly.' imgs = imgs.to(device=device, dtype=torch.float32) mask_type = torch.float32 if net.Unet.n_classes == 1 else torch.long true_masks = true_masks.to(device=device, dtype=mask_type) label = label.to(device) # mask_pred, score = net(imgs, true_masks) score = net.QAM(imgs, true_masks) # print(f'score is {score}') # print(f'label is {label}') loss = loss_func(score, label) loss.backward() epoch_loss += loss # print(loss) writer.add_scalar('Loss/train', loss.item(), global_step=global_step) pbar.set_postfix(**{'loss (batch)': loss}) optimizer.zero_grad() # nn.utils.clip_grad_value_(net.parameters(), 0.1) optimizer.step() pbar.update(imgs.shape[0]) global_step += 1 # if global_step % (n_train // (10 * batch_size)) == 0: for tag, value in net.named_parameters(): tag = tag.replace('.', '/') writer.add_histogram('weights/' + tag, value.data.cpu().numpy(), global_step=global_step) val_loss, val_acc = eval_net_cls(net, val_dataloader, device) scheduler.step(val_loss) writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], global_step=global_step) logging.info('Validation cross entropy: {}'.format(val_loss)) writer.add_scalar('Loss/valid', val_loss, global_step=global_step) logging.info('Validation accuracy: {}'.format(val_acc)) writer.add_scalar('Accuracy/valid', val_acc, global_step=global_step) if save_cp: dir_checkpoint = os.path.join('/media/muyun99/DownloadResource/dataset/opends-Supervisely Person Dataset/checkpoints', args.name) if not os.path.exists(dir_checkpoint): os.mkdir(dir_checkpoint) logging.info('Create checkopint directory') torch.save(net.state_dict(), os.path.join(dir_checkpoint, 'CP_epoch{}.pth'.format(epoch + 1))) logging.info('Checkpoint {} saved!'.format(epoch + 1)) test_loss, test_acc = eval_net_cls(net, test_dataloader, device) logging.info('Test loss: {}'.format(test_loss)) writer.add_scalar('Dice/test', test_loss, global_step=global_step) logging.info('Test accuracy: {}'.format(test_acc)) writer.add_scalar('Accuracy/train', test_acc, global_step=global_step) writer.close()