# Log the current Epoch Number writer.add_scalar('data/Epoch Number', epoch, total_iter_num) ###################### Training Cycle ############################# print('Train:') print('=' * 10) # Update Learning Rate Scheduler if config.train.lrScheduler == 'StepLR': lr_scheduler.step() elif config.train.lrScheduler == 'ReduceLROnPlateau': lr_scheduler.step(epoch_loss) elif config.train.lrScheduler == 'lr_poly': if epoch % config.train.epochSize == config.train.epochSize - 1: lr_ = utils.lr_poly(config.train.optimSgd.learningRate, epoch - START_EPOCH, END_EPOCH - START_EPOCH, 0.9) # optimizer = optim.SGD(net.parameters(), lr=lr_, momentum=p['momentum'], weight_decay=p['wd']) optimizer = torch.optim.SGD( model.parameters(), lr=config.train.optimSgd.learningRate, momentum=config.train.optimSgd.momentum, weight_decay=config.train.optimSgd.weight_decay) model.train() running_loss = 0.0 total_iou = 0.0 for iter_num, batch in enumerate(tqdm(trainLoader)): total_iter_num += 1
def train(train_loader, model, optimizer, base_lrs, iter_stat, epoch, writer, model_old=None, adjust_lr=True): kl_weight = args.lwf """Train for one epoch on the training set""" batch_time = AverageMeter() losses = AverageMeter() losses_kl = AverageMeter() model.eval() # start timer end = time.time() # train for one epoch optimizer.zero_grad() epoch_size = len(train_loader) train_loader_iter = iter(train_loader) bar_format = '{desc}[{elapsed}<{remaining},{rate_fmt}]' pbar = tqdm(range(epoch_size), file=sys.stdout, bar_format=bar_format, ncols=80) for idx_iter in pbar: optimizer.zero_grad() if adjust_lr: lr = lr_poly(base_lrs[-1], iter_stat.iter_curr, iter_stat.iter_max, 0.9) writer.add_scalar("lr", lr, idx_iter + epoch * epoch_size) adjust_learning_rate(base_lrs, optimizer, iter_stat.iter_curr, iter_stat.iter_max, 0.9) input, label = next(train_loader_iter) label = label.cuda() input = input.cuda() # compute output output, features_new = model(input, output_features=['layer1', 'layer4'], task='new') # compute gradient loss = CrossEntropyLoss(output, label.long()) # LWF KL div if model_old is None: loss_kl = 0 else: output_new = model.forward_fc(features_new['layer4'], task='old') output_old, features_old = model_old(input, output_features=['layer1', 'layer4'], task='old') loss_kl = KLDivLoss(F.log_softmax(output_new, dim=1), F.softmax(output_old, dim=1)).sum(-1) (loss + kl_weight * loss_kl).backward() # measure accuracy and record loss losses.update(loss, input.size(0)) losses_kl.update(loss_kl, input.size(0)) # compute gradient and do SGD step optimizer.step() # increment iter number iter_stat.update() # measure elapsed time batch_time.update(time.time() - end) end = time.time() writer.add_scalar("loss/ce", losses.val, idx_iter + epoch * epoch_size) writer.add_scalar("loss/kl", losses_kl.val, idx_iter + epoch * epoch_size) writer.add_scalar("loss/total", losses.val + losses_kl.val, idx_iter + epoch * epoch_size) description = "[loss: %.3f][loss_kl: %.3f]"%(losses.val, losses_kl.val) pbar.set_description("[Step %d/%d]"%(idx_iter + 1, epoch_size) + description)
# Log the current Epoch Number writer.add_scalar('data/Epoch Number', epoch, total_iter_num) ###################### Training Cycle ############################# print('Train:') print('=' * 10) # Update Learning Rate Scheduler if config.train.lrScheduler == 'StepLR': lr_scheduler.step() elif config.train.lrScheduler == 'ReduceLROnPlateau': lr_scheduler.step(epoch_loss) elif config.train.lrScheduler == 'lr_poly': if epoch % config.train.epochSize == config.train.epochSize - 1: lr_ = utils.lr_poly(float(config.train.optimSgd.learningRate), int(epoch - START_EPOCH), int(END_EPOCH - START_EPOCH), 0.9) train_params = model.parameters() if model == 'drn': train_params = [{ 'params': model.get_1x_lr_params(), 'lr': config.train.optimSgd.learningRate }, { 'params': model.get_10x_lr_params(), 'lr': config.train.optimSgd.learningRate * 10 }] optimizer = torch.optim.SGD( train_params, lr=lr_,
def train(args, train_loader, model, optimizer, base_lrs, iter_stat, epoch, logger, device, adjust_lr=True): tb_interval = 50 csg_weight = args.csg """Train for one epoch on the training set""" losses = AverageMeter() losses_csg = [AverageMeter() for _ in range(len(model.stages))] # [_loss] x #stages top1_csg = [AverageMeter() for _ in range(len(model.stages))] model.eval() model.encoder_q.fc_new.train() # train for one epoch optimizer.zero_grad() epoch_size = len(train_loader) train_loader_iter = iter(train_loader) bar_format = '{desc}[{elapsed}<{remaining},{rate_fmt}]' pbar = tqdm(range(epoch_size), file=sys.stdout, bar_format=bar_format, ncols=80) lr = lr_poly(base_lrs[-1], iter_stat.iter_curr, iter_stat.iter_max, 0.9) logger.log("lr %f" % lr) for idx_iter in pbar: optimizer.zero_grad() if adjust_lr: lr = lr_poly(base_lrs[-1], iter_stat.iter_curr, iter_stat.iter_max, 0.9) adjust_learning_rate(base_lrs, optimizer, iter_stat.iter_curr, iter_stat.iter_max, 0.9) sample = next(train_loader_iter) label = sample['label'].to(device) input = sample['data'] if args.augment: input_q = input.to(device) input_k = sample['img_k'].to(device) else: input_q = input.to(device) input_k = None # keys: output, predictions_csg, targets_csg results = model(input_q, input_k) # synthetic task loss = CrossEntropyLoss(results['output'], label.long()) # measure accuracy and record loss losses.update(loss, label.size(0)) for idx in range(len(model.stages)): _loss = 0 acc1 = None # predictions: cosine b/w q and k # targets: zeros _loss = CrossEntropyLoss(results['predictions_csg'][idx], results['targets_csg'][idx]) acc1, acc5 = accuracy_ranking(results['predictions_csg'][idx].data, results['targets_csg'][idx], topk=(1, 5)) loss = loss + _loss * csg_weight if acc1 is not None: top1_csg[idx].update(acc1, label.size(0)) # measure accuracy and record loss losses_csg[idx].update(_loss, label.size(0)) loss.backward() # compute gradient and do SGD step optimizer.step() # increment iter number iter_stat.update() if idx_iter % tb_interval == 0: logger.writer.add_scalar("loss/ce", losses.val, idx_iter + epoch * epoch_size) description = "[XE %.3f]" % (losses.val) description += "[CSG " loss_str = "" acc_str = "" for idx, stage in enumerate(model.stages): if idx_iter % tb_interval == 0: logger.writer.add_scalar("loss/layer%d" % stage, losses_csg[idx].val, idx_iter + epoch * epoch_size) loss_str += "%.2f|" % losses_csg[idx].val if idx_iter % tb_interval == 0: logger.writer.add_scalar("prec/layer%d" % stage, top1_csg[idx].val[0], idx_iter + epoch * epoch_size) acc_str += "%.1f|" % top1_csg[idx].val[0] description += "loss:%s ranking:%s]" % (loss_str[:-1], acc_str[:-1]) if idx_iter % tb_interval == 0: logger.writer.add_scalar( "loss/total", losses.val + sum([_loss.val for _loss in losses_csg]), idx_iter + epoch * epoch_size) pbar.set_description("[Step %d/%d][%s]" % (idx_iter + 1, epoch_size, str(csg_weight)) + description)