def step(args, split, epoch, loader, model, optimizer = None, M = None, f = None, tag = None): losses, mpjpe, mpjpe_r = AverageMeter(), AverageMeter(), AverageMeter() viewLosses, shapeLosses, supLosses = AverageMeter(), AverageMeter(), AverageMeter() if split == 'train': model.train() else: model.eval() bar = Bar('{}'.format(ref.category), max=len(loader)) nViews = loader.dataset.nViews for i, (input, target, meta) in enumerate(loader): input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) output = model(input_var) loss = ShapeConsistencyCriterion(nViews, supWeight = 1, unSupWeight = args.shapeWeight, M = M)(output, target_var, torch.autograd.Variable(meta)) if split == 'test': for j in range(input.numpy().shape[0]): img = (input.numpy()[j] * 255).transpose(1, 2, 0).astype(np.uint8) cv2.imwrite('{}/img_{}/{}.png'.format(args.save_path, tag, i * input.numpy().shape[0] + j), img) gt = target.cpu().numpy()[j] pred = (output.data).cpu().numpy()[j] vis = meta.cpu().numpy()[j][5:] for t in range(ref.J): f.write('{} {} {} '.format(pred[t * 3], pred[t * 3 + 1], pred[t * 3 + 2])) f.write('\n') for t in range(ref.J): f.write('{} {} {} '.format(gt[t, 0], gt[t, 1], gt[t, 2])) f.write('\n') if args.saveVis: for t in range(ref.J): f.write('{} 0 0 '.format(vis[t])) f.write('\n') mpjpe_this = accuracy(output.data, target, meta) mpjpe_r_this = accuracy_dis(output.data, target, meta) shapeLoss = shapeConsistency(output.data, meta, nViews, M, split = split) losses.update(loss.data[0], input.size(0)) shapeLosses.update(shapeLoss, input.size(0)) mpjpe.update(mpjpe_this, input.size(0)) mpjpe_r.update(mpjpe_r_this, input.size(0)) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() Bar.suffix = '{split:10}: [{0:2}][{1:3}/{2:3}] | Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | shapeLoss {shapeLoss.avg:.6f} | AE {mpjpe.avg:.6f} | ShapeDis {mpjpe_r.avg:.6f}'.format(epoch, i, len(loader), total=bar.elapsed_td, eta=bar.eta_td, loss=losses, mpjpe=mpjpe, split = split, shapeLoss = shapeLosses, mpjpe_r = mpjpe_r) bar.next() bar.finish() return mpjpe.avg, losses.avg, shapeLosses.avg
def train(self): criterion = nn.CrossEntropyLoss().to(self.device) optimizer = torch.optim.Adam(self.net.parameters(), lr=self.learning_rate) lr_scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, LambdaLR(self.num_epoch, self.epoch, self.decay_epoch).step) total_step = len(self.train_loader) losses = AverageMeter() accuracy = AverageMeter() accuracy_set, loss_set, lr_set, epoch_set = self.read_loss_info() loss_window = self.visdom.line(Y=[1]) lr_window = self.visdom.line(Y=[1]) accuracy_window = self.visdom.line(Y=[1]) for epoch in range(self.epoch, self.num_epoch): losses.reset() for step, (images, labels) in enumerate(self.train_loader): images = images.to(self.device) labels = labels.to(self.device) outputs = self.net(images) loss = criterion(outputs, labels) optimizer.zero_grad() loss.backward() optimizer.step() _, predicted = torch.max(outputs.data, 1) predicted = (predicted == labels).sum().item() losses.update(loss.item(), self.batch_size) accuracy.update(predicted / self.batch_size, self.batch_size) if step % 10 == 0: print( f'Epoch [{epoch}/{self.num_epoch}], Step [{step}/{total_step}], Loss: {losses.avg:.4f}, ' f'Accuracy: {accuracy.avg:.4f}') accuracy_set += [accuracy.avg] loss_set += [losses.avg] lr_set += [optimizer.param_groups[0]['lr']] epoch_set += [epoch] loss_window = self.visdom.line(Y=loss_set, X=epoch_set, win=loss_window, update='replace') lr_window = self.visdom.line(Y=lr_set, X=epoch_set, win=lr_window, update='replace') accuracy_window = self.visdom.line(Y=accuracy_set, X=epoch_set, win=accuracy_window, update='replace') self.save_loss_info(accuracy_set, loss_set, lr_set, epoch_set) torch.save(self.net.state_dict(), '%s/vgg16-%d.pth' % (self.checkpoint_dir, epoch)) lr_scheduler.step()
def train(args, train_loader, model, optimizer, ce_criterion, device, epoch, curr_lr, model_writer, global_step): batch_time = AverageMeter() data_time = AverageMeter() total_losses = AverageMeter() # switch to train mode model.train() end = time.time() for i, (mask, pos_vec, pose3d_discrete_seq, pose3d_discrete_gt_seq, \ mfcc_data, beat_data) in enumerate(train_loader): # BS X T X 48, BS X T X 48, BS X 1 X T, BS X 1 X T, BS X T X 48 bs = pose3d_discrete_seq.size()[0] timesteps = pose3d_discrete_seq.size()[1] # measure data loading time data_time.update(time.time() - end) # Send to device pose3d_discrete_seq = pose3d_discrete_seq.to(device) pose3d_discrete_gt_seq = pose3d_discrete_gt_seq.to(device) mask = mask.to(device) pos_vec = pos_vec.to(device) if args.add_mfcc and args.add_beat: mfcc_data_input = mfcc_data.to(device) beat_data_input = beat_data.to(device).long() pred_out = model(pose3d_discrete_seq, mask, pos_vec, \ mfcc_feats=mfcc_data_input, beat_feats=beat_data_input) # BS X T X 48 X N_cls elif args.add_mfcc: mfcc_data_input = mfcc_data.to(device) pred_out = model(pose3d_discrete_seq, mask, pos_vec, mfcc_feats=mfcc_data_input) # BS X T X 48 X N_cls elif args.add_beat: beat_data_input = beat_data.to(device).long() pred_out = model(pose3d_discrete_seq, mask, pos_vec, beat_feats=beat_data_input) # BS X T X 48 X N_cls else: pred_out = model(pose3d_discrete_seq, mask, pos_vec) # BS X T X 48 X N_cls r_loss = ce_criterion(pred_out, pose3d_discrete_gt_seq, mask.squeeze(1).unsqueeze(2)) total_loss = r_loss model_writer.add_scalar("Loss", np.array(total_loss.item()), global_step) total_losses.update(total_loss.item(), 1) optimizer.zero_grad() total_loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if (i % args.print_freq == 0): print("\n\n") print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Total Loss {total_loss.val:.4f} ({total_loss.avg:.4f})\n' 'lr {learning_rate:.6f}\t' .format(epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, total_loss=total_losses, learning_rate=curr_lr)) global_step += 1 return global_step, total_losses.avg
def run_epoch(self, phase, epoch, data_loader): model_with_loss = self.model_with_loss if phase == 'train': model_with_loss.train() else: if len(self.opt.gpus) > 1: model_with_loss = self.model_with_loss.module model_with_loss.eval() torch.cuda.empty_cache() opt = self.opt results = {} data_time, batch_time = AverageMeter(), AverageMeter() avg_loss_stats = {l: AverageMeter() for l in self.loss_stats} num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters) end = time.time() for iter_id, batch in enumerate(data_loader): if iter_id >= num_iters: break data_time.update(time.time() - end) for k in batch: if k != 'meta': batch[k] = batch[k].to(device=opt.device, non_blocking=True) output, loss, loss_stats = model_with_loss(batch) loss = loss.mean() if phase == 'train': self.optimizer.zero_grad() loss.backward() self.optimizer.step() batch_time.update(time.time() - end) end = time.time() Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format( epoch, iter_id, num_iters, phase=phase, total=bar.elapsed_td, eta=bar.eta_td) for l in avg_loss_stats: avg_loss_stats[l].update(loss_stats[l].mean().item(), batch['input'].size(0)) Bar.suffix = Bar.suffix + '|{} {:.4f} '.format( l, avg_loss_stats[l].avg) if not opt.hide_data_time: Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \ '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) if opt.print_iter > 0: if iter_id % opt.print_iter == 0: print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix)) else: bar.next() if opt.debug > 0: self.debug(batch, output, iter_id) if opt.test: self.save_result(output, batch, results) del output, loss, loss_stats bar.finish() ret = {k: v.avg for k, v in avg_loss_stats.items()} ret['time'] = bar.elapsed_td.total_seconds() / 60. return ret, results
def test(self): self.feature_extractor.eval() self.classifier.eval() prec1_fs = AverageMeter() prec1_ft = AverageMeter() counter_all_fs = torch.FloatTensor( self.opt.DATASET.NUM_CLASSES).fill_(0) counter_all_ft = torch.FloatTensor( self.opt.DATASET.NUM_CLASSES).fill_(0) counter_acc_fs = torch.FloatTensor( self.opt.DATASET.NUM_CLASSES).fill_(0) counter_acc_ft = torch.FloatTensor( self.opt.DATASET.NUM_CLASSES).fill_(0) class_weight = torch.zeros(self.num_classes) class_weight = class_weight.cuda() count = 0 for i, (input, target) in enumerate(self.test_data['loader']): input, target = to_cuda(input), to_cuda(target) with torch.no_grad(): feature_test = self.feature_extractor(input) output_test = self.classifier(feature_test) prob = F.softmax(output_test[:, self.num_classes:], dim=1) class_weight = class_weight + prob.data.sum(0) count = count + input.size(0) if self.opt.EVAL_METRIC == 'accu': prec1_fs_iter = accuracy(output_test[:, :self.num_classes], target) prec1_ft_iter = accuracy(output_test[:, self.num_classes:], target) prec1_fs.update(prec1_fs_iter, input.size(0)) prec1_ft.update(prec1_ft_iter, input.size(0)) if i % self.opt.PRINT_STEP == 0: print(" Test:epoch: %d:[%d/%d], AccFs: %3f, AccFt: %3f" % \ (self.epoch, i, len(self.test_data['loader']), prec1_fs.avg, prec1_ft.avg)) elif self.opt.EVAL_METRIC == 'accu_mean': prec1_ft_iter = accuracy(output_test[:, self.num_classes:], target) prec1_ft.update(prec1_ft_iter, input.size(0)) counter_all_fs, counter_acc_fs = accuracy_for_each_class( output_test[:, :self.num_classes], target, counter_all_fs, counter_acc_fs) counter_all_ft, counter_acc_ft = accuracy_for_each_class( output_test[:, self.num_classes:], target, counter_all_ft, counter_acc_ft) if i % self.opt.PRINT_STEP == 0: print(" Test:epoch: %d:[%d/%d], Task: %3f" % \ (self.epoch, i, len(self.test_data['loader']), prec1_ft.avg)) else: raise NotImplementedError acc_for_each_class_fs = counter_acc_fs / counter_all_fs acc_for_each_class_ft = counter_acc_ft / counter_all_ft log = open(os.path.join(self.opt.SAVE_DIR, 'log.txt'), 'a') log.write("\n") class_weight = class_weight / count class_weight = class_weight / max(class_weight) if self.opt.EVAL_METRIC == 'accu': log.write( " Test:epoch: %d, AccFs: %3f, AccFt: %3f" % \ (self.epoch, prec1_fs.avg, prec1_ft.avg)) log.close() return class_weight, max(prec1_fs.avg, prec1_ft.avg) elif self.opt.EVAL_METRIC == 'accu_mean': log.write( " Test:epoch: %d, AccFs: %3f, AccFt: %3f" % \ (self.epoch,acc_for_each_class_fs.mean(), acc_for_each_class_ft.mean())) log.write( "\nClass-wise Acc of Ft:") ## based on the task classifier. for i in range(self.opt.DATASET.NUM_CLASSES): if i == 0: log.write("%dst: %3f" % (i + 1, acc_for_each_class_ft[i])) elif i == 1: log.write(", %dnd: %3f" % (i + 1, acc_for_each_class_ft[i])) elif i == 2: log.write(", %drd: %3f" % (i + 1, acc_for_each_class_ft[i])) else: log.write(", %dth: %3f" % (i + 1, acc_for_each_class_ft[i])) log.close() return class_weight, max(acc_for_each_class_ft.mean(), acc_for_each_class_fs.mean())
def train(epoch, writer): # define meters loss_meter = AverageMeter() # put model into training mode model.train() # set this only when it is finetuning # for module in model.modules(): # if isinstance(module, torch.nn.modules.BatchNorm1d): # module.eval() # if isinstance(module, torch.nn.modules.BatchNorm2d): # module.eval() # if isinstance(module, torch.nn.modules.BatchNorm3d): # module.eval() for param_group in optimizer.param_groups: print('learning rate: {}'.format(param_group['lr'])) for i, sample in enumerate(tqdm(train_dataset_it)): im = sample['image'] instances = sample['instance'].squeeze() class_labels = sample['label'].squeeze() output = model(im) loss = criterion(output, instances, class_labels, **args['loss_w']) loss = loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() #output.detach().cpu() #torch.cuda.empty_cache() if args['display'] and i % args['display_it'] == 0: with torch.no_grad(): visualizer.display(im[0], 'image') predictions = cluster.cluster_with_gt( output[0], instances[0], n_sigma=args['loss_opts']['n_sigma']) visualizer.display([predictions.cpu(), instances[0].cpu()], 'pred') sigma = output[0][2].cpu() sigma = (sigma - sigma.min()) / (sigma.max() - sigma.min()) sigma[instances[0] == 0] = 0 visualizer.display(sigma, 'sigma') seed = torch.sigmoid(output[0][3]).cpu() visualizer.display(seed, 'seed') loss_meter.update(loss.item()) if args['tensorboard']: with torch.no_grad(): color_map = draw_flow(torch.tanh(output[0][0:2])) seed = torch.sigmoid(output[0][3:11]).cpu() sigma = output[0][2].cpu() sigma = (sigma - sigma.min()) / (sigma.max() - sigma.min()) sigma[instances[0] == 0] = 0 #predictions = cluster.cluster_with_gt(output[0], instances[0], n_sigma=args['loss_opts']['n_sigma']) color_map = color_map.transpose(2, 0, 1) seed_visual = seed.unsqueeze(1) seed_show = vutils.make_grid(seed_visual, nrow=8, normalize=True, scale_each=True) writer.add_image('Input', im[0], epoch) writer.add_image('InstanceGT', instances[0].unsqueeze(0).cpu().numpy(), epoch) writer.add_image('ColorMap', color_map, epoch) writer.add_image('SeedMap', seed_show, epoch) writer.add_image('SigmaMap', sigma.unsqueeze(0).cpu().numpy(), epoch) #writer.add_image('Prediction', predictions.unsqueeze(0).cpu().numpy(), epoch) return loss_meter.avg
def run_epoch(self, phase, epoch, data_loader, logger=None): model_with_loss = self.model_with_loss if phase == 'train': model_with_loss.train() else: if len(self.opt.gpus) > 1: model_with_loss = self.model_with_loss.module model_with_loss.eval() torch.cuda.empty_cache() opt = self.opt results = {} data_time, batch_time = AverageMeter(), AverageMeter() avg_loss_stats = {l: AverageMeter() for l in self.loss_stats} num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters) end = time.time() for iter_id, batch in enumerate(data_loader): if iter_id >= num_iters: break data_time.update(time.time() - end) for k in batch: if k != 'meta': batch[k] = batch[k].to(device=opt.device, non_blocking=True) output, loss, loss_stats = model_with_loss(batch) if self.reconstruct_img: file_path = '/data/mry/code/CenterNet/debug_conflict_bt_class_recon' self.save_tensor_to_img(output['reconstruct_img'], batch['meta']['file_name'], file_path) loss = loss.mean() if phase == 'train': self.optimizer.zero_grad() loss.backward() self.optimizer.step() batch_time.update(time.time() - end) end = time.time() Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format( epoch, iter_id, num_iters, phase=phase, total=bar.elapsed_td, eta=bar.eta_td) for l in avg_loss_stats: if l == 'KL_loss': if loss_stats[l] is not None: avg_loss_stats[l].update(loss_stats[l].mean().item(), batch['input'].size(0)) Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l]) else: avg_loss_stats[l].update(0, batch['input'].size(0)) continue avg_loss_stats[l].update( loss_stats[l].mean().item(), batch['input'].size(0)) Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg) if not opt.hide_data_time: Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \ '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) if logger and iter_id % opt.logger_iteration == 0: logger.write_iteration( '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format( epoch, iter_id, num_iters, phase=phase, total=bar.elapsed_td, eta=bar.eta_td)) for l in avg_loss_stats: if loss_stats[l] is None: continue avg_loss_stats[l].update(loss_stats[l].mean().item(), batch['input'].size(0)) logger.write_iteration('|{} {:.4f} '.format(l, avg_loss_stats[l].avg)) logger.scalar_summary('train_iteration_{}'.format(l), avg_loss_stats[l].avg, (epoch-1)*num_iters+iter_id) logger.write_iteration('\n') if opt.print_iter > 0: if iter_id % opt.print_iter == 0: print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix)) else: bar.next() if opt.debug > 0: self.debug(batch, output, iter_id) if opt.test: self.save_result(output, batch, results) del output, loss, loss_stats bar.finish() ret = {k: v.avg for k, v in avg_loss_stats.items()} ret['time'] = bar.elapsed_td.total_seconds() / 60. return ret, results
def train_learner(self, x_train, y_train): self.before_train(x_train, y_train) # set up loader train_dataset = dataset_transform( x_train, y_train, transform=transforms_match[self.data]) train_loader = data.DataLoader(train_dataset, batch_size=self.batch, shuffle=True, num_workers=0, drop_last=True) # setup tracker losses_batch = AverageMeter() acc_batch = AverageMeter() # set up model self.model.train() for ep in range(self.epoch): for i, batch_data in enumerate(train_loader): # batch update batch_x, batch_y = batch_data batch_x = maybe_cuda(batch_x, self.cuda) batch_y = maybe_cuda(batch_y, self.cuda) # update the running fisher if (ep * len(train_loader) + i + 1) % self.fisher_update_after == 0: self.update_running_fisher() out = self.forward(batch_x) loss = self.total_loss(out, batch_y) if self.params.trick['kd_trick']: loss = 1 / (self.task_seen + 1) * loss + (1 - 1 / (self.task_seen + 1)) * \ self.kd_manager.get_kd_loss(out, batch_x) if self.params.trick['kd_trick_star']: loss = 1 / ((self.task_seen + 1) ** 0.5) * loss + \ (1 - 1 / ((self.task_seen + 1) ** 0.5)) * self.kd_manager.get_kd_loss(out, batch_x) # update tracker losses_batch.update(loss.item(), batch_y.size(0)) _, pred_label = torch.max(out, 1) acc = (pred_label == batch_y).sum().item() / batch_y.size(0) acc_batch.update(acc, batch_y.size(0)) # backward self.opt.zero_grad() loss.backward() # accumulate the fisher of current batch self.accum_fisher() self.opt.step() if i % 100 == 1 and self.verbose: print('==>>> it: {}, avg. loss: {:.6f}, ' 'running train acc: {:.3f}'.format( i, losses_batch.avg(), acc_batch.avg())) # save params for current task for n, p in self.weights.items(): self.prev_params[n] = p.clone().detach() # update normalized fisher of current task max_fisher = max([torch.max(m) for m in self.running_fisher.values()]) min_fisher = min([torch.min(m) for m in self.running_fisher.values()]) for n, p in self.running_fisher.items(): self.normalized_fisher[n] = (p - min_fisher) / (max_fisher - min_fisher + 1e-32) self.after_train()
model.train() for data in train_loader: origin_x, origin_y = data[0].cuda(), data[1].cuda() positive_x, positive_y = data[2].cuda(), data[3].cuda() negative_x, negative_y = data[4].cuda(), data[5].cuda() origin_x_feat, origin_y_hat = model(origin_x) positive_x_feat, positive_y_hat = model(positive_x) negative_x_feat, negative_y_hat = model(negative_x) # loss_tp = torch.FloatTensor([0.]).cuda() loss_tp = criterion_TP (origin_x_feat, positive_x_feat, negative_x_feat) loss_ce = (criterion_CE (origin_y_hat, origin_y) + criterion_CE (positive_y_hat, origin_y) + criterion_CE ( negative_y_hat, negative_y)) / 3 acc_1, acc_3 = accuracy(origin_y_hat, origin_y, topk=(1, 3)) avg_acc.update(acc_1.item(), data[0].shape[0]) avg_triplet.update(loss_tp.item(), data[0].shape[0]) avg_ce.update(loss_ce.item(), data[0].shape[0]) loss = loss_tp + loss_ce optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() y_hat_list = [] y_list = [] model.eval() with torch.no_grad():
def inference(): model = ACNet_models_V1.ACNet(num_class=40, pretrained=False) load_ckpt(model, None, args.last_ckpt, device) model.eval() model.to(device) val_data = ACNet_data.SUNRGBD(transform=torchvision.transforms.Compose( [scaleNorm(), ToTensor(), Normalize()]), phase_train=False, data_dir=args.data_dir) val_loader = DataLoader(val_data, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) acc_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() a_meter = AverageMeter() b_meter = AverageMeter() with torch.no_grad(): for batch_idx, sample in enumerate(val_loader): #todo batch=1,这里要查看sample的size,决定怎么填装image depth label,估计要用到for循环 origin_image = sample['origin_image'].numpy() origin_depth = sample['origin_depth'].numpy() image = sample['image'].to(device) depth = sample['depth'].to(device) label = sample['label'].numpy() with torch.no_grad(): pred = model(image, depth) output = torch.max(pred, 1)[1] + 1 output = output.squeeze(0).cpu().numpy() acc, pix = accuracy(output, label) intersection, union = intersectionAndUnion(output, label, args.num_class) acc_meter.update(acc, pix) a_m, b_m = macc(output, label, args.num_class) intersection_meter.update(intersection) union_meter.update(union) a_meter.update(a_m) b_meter.update(b_m) print('[{}] iter {}, accuracy: {}'.format( datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), batch_idx, acc)) # img = image.cpu().numpy() # print('origin iamge: ', type(origin_image)) if args.visualize: visualize_result(origin_image, origin_depth, label - 1, output - 1, batch_idx, args) iou = intersection_meter.sum / (union_meter.sum + 1e-10) for i, _iou in enumerate(iou): print('class [{}], IoU: {}'.format(i, _iou)) mAcc = (a_meter.average() / (b_meter.average() + 1e-10)) print(mAcc.mean()) print('[Eval Summary]:') print('Mean IoU: {:.4}, Accuracy: {:.2f}%'.format( iou.mean(), acc_meter.average() * 100))
def train(self): #load data loader train_loader, valid_loader = self.dataset.get_train_validation_data_loaders( ) #define optimier optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, self.model.parameters()), self.train_config['lr'], weight_decay=eval( self.train_config['weight_decay'])) n_steps = self.train_config["epochs"] * len(train_loader) #learning rate schudler scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_steps) warmup_scheduler = warmup.UntunedLinearWarmup(optimizer) if apex_support and self.config['fp16_precision']: self.model, optimizer = amp.initialize(self.model, optimizer, opt_level='O2', keep_batchnorm_fp32=True) model_checkpoints_folder = os.path.join(self.writer.log_dir, 'checkpoints') # save config file save_config_file(model_checkpoints_folder) logger.info("***** Running training *****") logger.info(" Total optimization steps = %d", n_steps) n_iter = 0 valid_n_iter = 0 best_valid_loss = np.inf losses = AverageMeter() for epoch_counter in range(self.train_config['epochs']): self.model.train() # self.model.apply(set_bn_eval) epoch_iterator = tqdm(train_loader, desc="Training (X / X Steps) (loss=X.X)", bar_format="{l_bar}{r_bar}", dynamic_ncols=True) for [xis, xjs], labels in epoch_iterator: optimizer.zero_grad() loss = self._step(xis, xjs, labels) losses.update(loss.item(), self.config["batch_size"]) if n_iter % self.train_config['log_every_n_steps'] == 0: self.writer.add_scalar('train_loss', loss, global_step=n_iter) if apex_support and self.train_config['fp16_precision']: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() n_iter += 1 epoch_iterator.set_description( "Training (%d / %d Epochs) (loss=%2.5f)" % (epoch_counter, self.train_config['epochs'], losses.val)) # warmup for the first 10 epochs scheduler.step(scheduler.last_epoch + 1) warmup_scheduler.dampen() # validate the model if requested if epoch_counter % self.train_config['eval_every_n_epochs'] == 0: valid_loss = self._validate(valid_loader) if valid_loss < best_valid_loss: # save the model weights best_valid_loss = valid_loss torch.save( self.model.state_dict(), os.path.join(model_checkpoints_folder, 'model.pth')) self.writer.add_scalar('validation_loss', valid_loss, global_step=valid_n_iter) valid_n_iter += 1 self.writer.add_scalar('cosine_lr_decay', scheduler.get_lr()[0], global_step=n_iter)
def predict(args, predict_data_loader, model, result_path): model.eval() batch_time = AverageMeter() end = time.time() for iter, (image, person_name, picname, imt) in enumerate(predict_data_loader): # batchsize = 1 ,so squeeze dim 1 image = image.squeeze() person_name = person_name[0] #print(image_name) with torch.no_grad(): # batch test for memory reduce batch = 1 pred_seg = torch.zeros(image.shape[0], image.shape[2], image.shape[3]) #pred_cls = torch.zeros(image.shape[0], 3) for i in range(0, image.shape[0], batch): start_id = i end_id = i + batch if end_id > image.shape[0]: end_id = image.shape[0] image_batch = image[start_id:end_id, :, :, :] image_var = Variable(image_batch).cuda() # model forward output_seg = model(image_var) _, pred_batch = torch.max(output_seg, 1) pred_seg[start_id:end_id, :, :] = pred_batch.cpu().data #pred_cls[start_id:end_id, :] = output_cls.cpu().data pred_seg = pred_seg.numpy().astype('uint8') # predict label #pred_det = pred_cls.numpy().astype('float32') if args.vis: imt = (imt.squeeze().numpy()).astype('uint8') #ant = label.numpy().astype('uint8') save_dir = osp.join(result_path, 'vis', person_name) if not exists(save_dir): os.makedirs(save_dir) vis_predict(imt, pred_seg, pred_seg, save_dir, picname) print('save vis, finished!') batch_time.update(time.time() - end) # save seg result if args.seg: save_dir = osp.join(result_path, 'segment') if not exists(save_dir): os.makedirs(save_dir) np.save(osp.join(save_dir, image_name + '_labelMark_volumes'), pred_seg) print('save segment result, finished!') # save cls result #if args.det: # save_dir = osp.join(result_path, 'segment') # if not exists(save_dir): # os.makedirs(save_dir) # np.save(osp.join(save_dir, image_name + '_labelMark_detections'), pred_det) # print('save detection result, finished!') end = time.time() logger_vis.info( 'Eval: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.format( iter, len(predict_data_loader), batch_time=batch_time))
def evaluate(models, val_loader, interp, criterion, history, epoch, args): print('***Evaluating at {} epoch ...'.format(epoch)) loss_meter = AverageMeter() acc_meter = AverageMeter() models.eval() for i, batch_data in enumerate(val_loader): torch.cuda.synchronize() # forward pass images, labels, _ = batch_data images = images.to(device) labels = labels.to(device) pred_seg = models(images) pred_seg = interp(pred_seg) # pred_seg = F.softmax(pred_seg) loss = criterion(pred_seg, labels) loss_meter.update(loss.data.item()) print('[Eval] iter {}, loss: {}'.format(i, loss.data.item())) #acc = pixel_acc(pred_seg, labels) #acc_meter.update(acc.data.item()) labels = as_numpy(labels) _, pred = torch.max(pred_seg, dim=1) pred = as_numpy(pred.squeeze(0).cpu()) acc, pix = accuracy(pred, labels) acc_meter.update(acc, pix) if args.visualize: visualize_result(batch_data, pred_seg, args) history['val']['epoch'].append(epoch) history['val']['loss'].append(loss_meter.average()) history['val']['acc'].append(acc_meter.average()) print('[Eval Summary] Epoch: {}, Loss: {}, Accurarcy: {:4.2f}%'.format( epoch, loss_meter.average(), acc_meter.average() * 100)) # Plot figure if epoch > 0: print('Plotting loss figure...') fig = plt.figure() plt.plot(np.asarray(history['train']['epoch']), np.log(np.asarray(history['train']['loss'])), color='b', label='training') plt.plot(np.asarray(history['val']['epoch']), np.log(np.asarray(history['val']['loss'])), color='c', label='validation') plt.legend() plt.xlabel('Epoch') plt.ylabel('Log(loss)') fig.savefig('{}/loss.png'.format(args.checkpoints_dir), dpi=225) plt.close('all') fig = plt.figure() plt.plot(history['train']['epoch'], history['train']['acc'], color='b', label='training') plt.plot(history['val']['epoch'], history['val']['acc'], color='c', label='validation') plt.legend() plt.xlabel('Epoch') plt.ylabel('Accuracy') fig.savefig('{}/accuracy.png'.format(args.checkpoints_dir), dpi=225) plt.close('all')
def train(models, train_loader, interp, optimizers, criterion, history, epoch, args): batch_time = AverageMeter() data_time = AverageMeter() # loss_value = 0 # Switch to train mode models.train() # main loop tic = time.time() for i_iter, batch_data in enumerate(train_loader): cur_iter = i_iter + (epoch - 1) * args.epoch_iters # measure data loading time torch.cuda.synchronize() data_time.update(time.time() - tic) # optimizers.zero_grad() # cur_iter = i_iter + (epoch - 1) * args.epoch_iters # adjust_learning_rate(optimizers, cur_iter, args) # forward pass images, labels, _ = batch_data # print(images.type()) # feed input data # self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') images = images.to(device) labels = labels.to(device) #print(labels.shape) #print(labels.size()) #print(labels) optimizers.zero_grad() adjust_learning_rate(optimizers, cur_iter, args) pred_seg = models(images) #print(pred_seg) pred_seg = interp(pred_seg) # pred_seg = F.softmax(pred_seg) loss = criterion(pred_seg, labels) # loss_value += loss.item() #print(loss) # acc = pixel_acc(pred_seg, labels) # acc, _ = accuracy(pred_seg, labels) # loss = loss.mean() # acc = acc.mean() # Backward / compute gradient and do SGD step # optimizers.zero_grad() loss.backward() # optimizers.step() #ave_total_loss.update(loss.data.item()) #ave_acc.update(acc.data.item() * 100) # loss_value += loss.data.cpu().numpy().item() # loss_value += loss.data.item() # loss_value += loss.item() # loss_value += loss.data.cpu().numpy()[0] # loss_value += loss.cpu().numpy()[0] optimizers.step() #loss_value += loss.item() # loss_value += loss.data.cpu().numpy().item() # loss_value = loss.data.item() # Measure elapsed time batch_time.update(time.time() - tic) tic = time.time() #loss_value += loss.data.cpu().numpy().item() # Update average loss and acc # acc = pixel_acc(pred_seg, labels) # ave_total_loss.update(loss.data.item()) # ave_acc.update(acc.data.item() * 100) if i_iter % args.display_iter == 0: acc = pixel_acc(pred_seg, labels) print('Epoch: [{}][{}/{}], Time: {:.2f}, Data: {:.2f}, ' 'LR: {:.6f} ' 'Accurary: {:4.2f}, Loss: {:.6f} '.format( epoch, i_iter, args.epoch_iters, batch_time.average(), data_time.average(), args.running_lr, acc.data.item() * 100, loss.data.item())) fractional_epoch = epoch - 1 + 1. * i_iter / args.epoch_iters history['train']['epoch'].append(fractional_epoch) history['train']['loss'].append(loss.data.item()) history['train']['acc'].append(acc.data.item())
def train(epoch): model.train() nIters = len(train_loader) # bar = Bar('==>', max=nIters) Loss, Acc = AverageMeter(), AverageMeter() start_time = time.time() for i, (inp_img, down_img, pose) in enumerate(train_loader): input_img = Variable(inp_img).float().cuda() target_img = Variable(down_img).float().cuda() input_pose = Variable( torch.FloatTensor(getPreds(pose.cpu().numpy())).view( -1, 32)).float().cuda() recon_img, recon_pose, h_img, h_pose = model(input_img, input_pose) ll_loss_img = criterion(recon_img, target_img) ll_loss_pose = criterion(recon_pose, input_pose) h_img_copy, h_pose_copy = h_img.detach(), h_pose.detach() dissim_loss = F.mse_loss(h_img, h_pose_copy) + F.mse_loss( h_pose, h_img_copy) # add kl-div loss for each ae to get vaes # add kl-div loss for dissimilarity total_loss = ll_loss_img * opt.img_recon_wt + ll_loss_pose * opt.pose_recon_wt + dissim_loss * opt.dissim_wt Loss.update(total_loss.data[0], inp_img.size(0)) img_to_pose = model.forward_i_to_p(h_img) Acc.update( Accuracy_Reg((img_to_pose.data.view(-1, 16, 2)).cpu().numpy(), (input_pose.data.view(-1, 16, 2)).cpu().numpy())) del img_to_pose optimizer.zero_grad() total_loss.backward() optimizer.step() curr_time = time.time() print( '{split} Epoch: [{0}][{1}/{2}]| Total: {total:f} | ETA: {eta:f} | Loss {loss.avg:.6f} | Acc {Acc.avg:.6f} ({Acc.val:.6f} )' .format(epoch, i, nIters, total=curr_time - start_time, eta=(curr_time - start_time) * (nIters - i + 1) / (i + 1), loss=Loss, Acc=Acc, split='train')) if i % save_interval == 0: n = min(input_img.size(0), 4) orig_i, recon_i, orig_p, recon_p = target_img[: n].data, recon_img[:n].data, makeSkel_64( input_pose[:n]. data, (0, 0, 255) ), makeSkel_64( recon_pose[:n]. data, (255, 0, 0)) img_to_pose = model.forward_i_to_p(h_img[:n]) pose_to_img = model.forward_p_to_i(h_pose[:n]) i_to_p, p_to_i = makeSkel_64(img_to_pose.data, (0, 0, 255)), pose_to_img.data comparison = torch.cat( [orig_i, recon_i, orig_p, recon_p, i_to_p, p_to_i]) save_image(comparison, saveDir_results + 'reconstruction_' + str(epoch) + "_" + str(i) + '.png', nrow=n) print("Saving results for epoch : {0}, progress : {1:.0f}".format( epoch, 100 * i / len(train_loader)))
def train(args, train_loader, model, auxiliarynet, criterion, optimizer, epoch): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') error = AverageMeter('error', ':6.2f') progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, error, prefix="Train Epoch: [{}]".format(epoch)) # switch to train mode model.train() auxiliarynet.train() end = time.time() for batch_idx, (patch, gaze_norm_g, head_norm, rot_vec_norm) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) patch.requires_grad = False patch = patch.to(args.device) gaze_norm_g.requires_grad = False gaze_norm_g = gaze_norm_g.to(args.device) head_norm.requires_grad = False head_norm = head_norm.to(args.device) rot_vec_norm.requires_grad = False rot_vec_norm = rot_vec_norm.to(args.device) # model = model.to(args.device) # auxiliarynet = auxiliarynet.to(args.device) gaze_pred, features = model(patch) # print(features.size()) hp_pred = auxiliarynet(features) head_norm = 10 * head_norm gaze_norm_g = 100 * gaze_norm_g loss = criterion(gaze_norm_g, head_norm, gaze_pred, hp_pred) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() angle_error = mean_angle_error( gaze_pred.cpu().detach().numpy() / 100, gaze_norm_g.cpu().detach().numpy() / 100, rot_vec_norm.cpu().detach().numpy()) losses.update(loss.item()) error.update(angle_error) if (batch_idx + 1) % args.print_freq == 0: progress.print(batch_idx + 1) return losses.get_avg(), error.get_avg()
def train_learner(self, x_train, y_train): self.before_train(x_train, y_train) # set up loader train_dataset = dataset_transform( x_train, y_train, transform=transforms_match[self.data]) train_loader = data.DataLoader(train_dataset, batch_size=self.batch, shuffle=True, num_workers=0, drop_last=True) # set up model self.model = self.model.train() # setup tracker losses_batch = AverageMeter() acc_batch = AverageMeter() for ep in range(self.epoch): for i, batch_data in enumerate(train_loader): # batch update batch_x, batch_y = batch_data batch_x = maybe_cuda(batch_x, self.cuda) batch_y = maybe_cuda(batch_y, self.cuda) for j in range(self.mem_iters): logits = self.forward(batch_x) loss = self.criterion(logits, batch_y) if self.params.trick['kd_trick']: loss = 1 / (self.task_seen + 1) * loss + (1 - 1 / (self.task_seen + 1)) * \ self.kd_manager.get_kd_loss(logits, batch_x) if self.params.trick['kd_trick_star']: loss = 1 / ((self.task_seen + 1) ** 0.5) * loss + \ (1 - 1 / ((self.task_seen + 1) ** 0.5)) * self.kd_manager.get_kd_loss(logits, batch_x) _, pred_label = torch.max(logits, 1) correct_cnt = (pred_label == batch_y).sum().item() / batch_y.size(0) # update tracker acc_batch.update(correct_cnt, batch_y.size(0)) losses_batch.update(loss, batch_y.size(0)) # backward self.opt.zero_grad() loss.backward() if self.task_seen > 0: # sample from memory of previous tasks mem_x, mem_y = self.buffer.retrieve() if mem_x.size(0) > 0: params = [ p for p in self.model.parameters() if p.requires_grad ] # gradient computed using current batch grad = [p.grad.clone() for p in params] mem_x = maybe_cuda(mem_x, self.cuda) mem_y = maybe_cuda(mem_y, self.cuda) mem_logits = self.forward(mem_x) loss_mem = self.criterion(mem_logits, mem_y) self.opt.zero_grad() loss_mem.backward() # gradient computed using memory samples grad_ref = [p.grad.clone() for p in params] # inner product of grad and grad_ref prod = sum([ torch.sum(g * g_r) for g, g_r in zip(grad, grad_ref) ]) if prod < 0: prod_ref = sum( [torch.sum(g_r**2) for g_r in grad_ref]) # do projection grad = [ g - prod / prod_ref * g_r for g, g_r in zip(grad, grad_ref) ] # replace params' grad for g, p in zip(grad, params): p.grad.data.copy_(g) self.opt.step() # update mem self.buffer.update(batch_x, batch_y) if i % 100 == 1 and self.verbose: print('==>>> it: {}, avg. loss: {:.6f}, ' 'running train acc: {:.3f}'.format( i, losses_batch.avg(), acc_batch.avg())) self.after_train()
def evaluate(): model = ACNet_models_V1.ACNet(num_class=5, pretrained=False) load_ckpt(model, None, None, args.last_ckpt, device) model.eval() model.to(device) val_data = ACNet_data.FreiburgForest( transform=torchvision.transforms.Compose([ ACNet_data.ScaleNorm(), ACNet_data.ToTensor(), ACNet_data.Normalize() ]), data_dirs=[args.test_dir], modal1_name=args.modal1, modal2_name=args.modal2, ) val_loader = DataLoader(val_data, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) acc_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() a_meter = AverageMeter() b_meter = AverageMeter() with torch.no_grad(): for batch_idx, sample in enumerate(val_loader): modal1 = sample['modal1'].to(device) modal2 = sample['modal2'].to(device) label = sample['label'].numpy() basename = sample['basename'][0] with torch.no_grad(): pred = model(modal1, modal2) output = torch.argmax(pred, 1) + 1 output = output.squeeze(0).cpu().numpy() acc, pix = accuracy(output, label) intersection, union = intersectionAndUnion(output, label, args.num_class) acc_meter.update(acc, pix) a_m, b_m = macc(output, label, args.num_class) intersection_meter.update(intersection) union_meter.update(union) a_meter.update(a_m) b_meter.update(b_m) print('[{}] iter {}, accuracy: {}' .format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), batch_idx, acc)) if args.visualize: visualize_result(modal1, modal2, label, output, batch_idx, args) if args.save_predictions: colored_output = utils.color_label_eval(output).astype(np.uint8) imageio.imwrite(f'{args.output_dir}/{basename}_pred.png', colored_output.transpose([1, 2, 0])) iou = intersection_meter.sum / (union_meter.sum + 1e-10) for i, _iou in enumerate(iou): print('class [{}], IoU: {}'.format(i, _iou)) mAcc = (a_meter.average() / (b_meter.average() + 1e-10)) print(mAcc.mean()) print('[Eval Summary]:') print('Mean IoU: {:.4}, Accuracy: {:.2f}%' .format(iou.mean(), acc_meter.average() * 100))
def train(genertor, discriminator, iterator, interp, optimizer, optimizer_D, criterion, criterion_bce, history, epoch, args): batch_time = AverageMeter() data_time = AverageMeter() # laber for adversarial training S1_label = 0 S2_label = 1 genertor.train() discriminator.train() # main loop tic = time.time() for i_iter in range(args.epoch_iters): loss_seg_value_S1 = 0 loss_seg_value_S2 = 0 loss_seg_value_La = 0 loss_adv_pred_value = 0 loss_D_value = 0 optimizer.zero_grad() adjust_learning_rate(optimizer, i_iter) optimizer_D.zero_grad() adjust_learning_rate_D(optimizer_D, i_iter) for param in discriminator.parameters(): param.requires_grad = False _, batch_data = next(iterator) # use enumerate() data_time.update(time.time() - tic) # batch_data = next(trainloader_iter) # use iter() images, labels, infos = batch_data # images, labels, _ = batch_data # print(images, labels) # feed input data input_img = Variable(images, volatile=False) # train:False , val: True label_seg = Variable(labels.long(), volatile=False) # long() ??? input_img = input_img.cuda() label_seg = label_seg.cuda() #print(label_seg) #print('input_img_size: {}, label_seg_size: {}'.format(input_img.size(), label_seg.size())) pred_S2, _, pred_S1 = genertor(input_img) pred_S1 = interp(pred_S1) # --> [ B x 150 x 321 x 321 ] pred_S2 = interp(pred_S2) #print(pred_G2.size()) #print(pred_G2.type()) # input size (torch.Size([4, 150, 321, 321])) Target size (torch.Size([4, 321, 321]) loss_seg_S1 = criterion(pred_S1, label_seg) loss_seg_S2 = criterion(pred_S2, label_seg) # produce mask #pred_label = pred_S2.data.cpu().numpy().argmax(axis=1) pred_label = pred_S1.data.cpu().numpy().argmax(axis=1) pred_label = torch.from_numpy(pred_label) pred_label = Variable(pred_label.long()).cuda() #loss_seg_La = criterion(pred_S2, pred_label) # / 1.65 loss_seg_La = criterion(pred_S2, label_seg) # / 1.65 D_out_S1 = interp(discriminator( F.softmax(pred_S1))) # --> [B x 1 x 321 x 321] D_out_S2 = interp(discriminator(F.softmax(pred_S2))) #loss_adv_pred = criterion_bce(D_out_S1, Variable(torch.FloatTensor(D_out_S1.data.size()).fill_(S2_label)).cuda()) loss_adv_pred = criterion_bce( D_out_S2, Variable(torch.FloatTensor( D_out_S2.data.size()).fill_(S1_label)).cuda()) loss_weakly = args.lambda_seg_La * loss_seg_La #loss_weakly = args.lambda_seg_La * (1 - (loss_seg_La / loss_seg_S2))**2 #loss = args.lambda_seg_S1 * loss_seg_S1 loss = args.lambda_seg_S1 * loss_seg_S1 + args.lambda_adv_pred * loss_adv_pred #loss = args.lambda_seg_S1 * loss_seg_S1 + args.lambda_adv_pred * loss_adv_pred + args.lambda_seg_La * loss_seg_La #loss = args.lambda_seg_S1 * loss_seg_S1 + args.lambda_adv_pred * loss_adv_pred + args.lambda_seg_La * (1 - (loss_seg_La / loss_seg_S2))**2 # proper normalization #loss_1.backward() # detach() loss_weakly.backward(retain_graph=True) loss.backward() loss_seg_value_S1 += loss_seg_S1.data.cpu().numpy()[0] loss_seg_value_S2 += loss_seg_S2.data.cpu().numpy()[0] loss_seg_value_La += loss_seg_La.data.cpu().numpy()[0] loss_adv_pred_value += loss_adv_pred.data.cpu().numpy()[0] # train D # model_D.train() # optimizer_D.zero_grad() # bring back requires_grad for param in discriminator.parameters(): param.requires_grad = True # train S1 pred_S1 = pred_S1.detach() D_out_S1 = interp(discriminator(F.softmax(pred_S1))) loss_D = criterion_bce( D_out_S1, Variable(torch.FloatTensor( D_out_S1.data.size()).fill_(S1_label)).cuda()) loss_D = loss_D / 2.0 loss_D.backward() loss_D_value += loss_D.data.cpu().numpy()[0] # train S2 pred_S2 = pred_S2.detach() D_out_S2 = interp(discriminator(F.softmax(pred_S2))) loss_D = criterion_bce( D_out_S2, Variable(torch.FloatTensor( D_out_S2.data.size()).fill_(S2_label)).cuda()) loss_D = loss_D / 2.0 loss_D.backward() loss_D_value += loss_D.data.cpu().numpy()[0] optimizer.step() optimizer_D.step() # measure elapsed time batch_time.update(time.time() - tic) tic = time.time() # calculate accuracy , mIOU, and display if i_iter % args.disp_iter == 0: # can not change acc_pred_outputs, pix_pred_outputs = accuracy(batch_data, pred_S1) #print('exp = {}'.format(args.checkpoints_dir)) print( 'iter =[{0:d}]/[{1:d}/{2:d}], Time: {3:.2f}, Data: {4:.2f}, loss_seg_S1 = {5:.4f} loss_seg_S2 = {6:.4f} loss_seg_La = {7:.4f}, loss_adv_pred = {8:.4f}, loss_D = {9:.4f}, Accurarcy: {10:4.2f}%' .format(epoch, i_iter, args.epoch_iters, batch_time.average(), data_time.average(), loss_seg_value_S1, loss_seg_value_S2, loss_seg_value_La, loss_adv_pred_value, loss_D_value, acc_pred_outputs * 100)) fractional_epoch = epoch - 1 + 1. * i_iter / args.epoch_iters history['train']['epoch'].append(fractional_epoch) history['train']['loss_pred_outputs'].append(loss_seg_S1.data[0]) history['train']['acc_pred_outputs'].append(acc_pred_outputs) # checkpoint if epoch == args.num_epoches and i_iter >= args.epoch_iters - 1: print('taking checkpoints latest ...') torch.save( genertor.state_dict(), osp.join( args.checkpoints_dir, str(args.generatormodel) + '_' + str(epoch) + 'epoch_' + str(args.epoch_iters) + '_latest.pth')) torch.save( discriminator.state_dict(), osp.join( args.checkpoints_dir, str(args.generatormodel) + '_' + str(epoch) + 'epoch_' + str(args.epoch_iters) + '_D_latest.pth')) loss_seg_S1 = history['train']['loss_pred_outputs'][-1] if loss_seg_S1 < args.best_loss: args.best_loss = loss_seg_S1 print('taking checkpoints best ...') torch.save( genertor.state_dict(), osp.join( args.checkpoints_dir, str(args.generatormodel) + '_' + str(args.epoch_iters) + '_train_best.pth')) torch.save( discriminator.state_dict(), osp.join( args.checkpoints_dir, str(args.generatormodel) + '_' + str(args.epoch_iters) + '_D_train_best.pth'))
def _train_epoch(self, start_time): train_loss = AverageMeter() for step, batch in enumerate(self.train_loader): self.model.train() batch = tuple(t.to(self.device) for t in batch) batch_size = batch[1].size(0) op = batch[0] inputs = { "input_ids_a": batch[1], "token_type_ids_a": batch[2], "attention_mask_a": batch[3], "input_ids_b": batch[4], "token_type_ids_b": batch[5], "attention_mask_b": batch[6], "input_ids_c": batch[7], "token_type_ids_c": batch[8], "attention_mask_c": batch[9], } if self.fts_flag: inputs.update({ "x_a": batch[10], "x_b": batch[11], "x_c": batch[12] }) # anchor, positive, negative = self.model(**inputs) outputs = self.model(**inputs) if type(outputs) not in (tuple, list): # tuple outputs = (outputs, ) loss = self.criterion(op.float(), *outputs) train_loss.update(loss.item(), batch_size) if self.gradient_accumulation_steps > 1: loss = loss / self.gradient_accumulation_steps with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() if (step + 1) % self.gradient_accumulation_steps == 0: self.optimizer.step() self.optimizer.zero_grad() self.global_step += 1 if (step + 1) % 20 == 0: rate = self.optimizer.get_lr() now_epoch = (self.global_step * self.batch_size / len(self.train_loader.dataset)) self.logger.info( f"{rate[0]:.7f} " f"{self.global_step / 1000:5.2f} " f"{now_epoch:6.2f} | " f"{train_loss.avg:.4f} | " f'{time_to_str((timer() - start_time), "sec")} ' f"{torch.cuda.memory_allocated() // 1024 ** 2}") train_log = {"loss": train_loss.avg} return train_log
def evaluate(genertor, val_loader, interp, criterion, history, epoch, args): print('Evaluating at {} epochs...'.format(epoch)) loss_pred_outputs_meter = AverageMeter() acc_pred_outputs_meter = AverageMeter() # switch to eval mode genertor.eval() for i, batch_data in enumerate(val_loader): # forward pass #_, batch_data = next(iterator) # use enumerate() #data_time.update(time.time() - tic) # batch_data = next(trainloader_iter) # use iter() images, labels, infos = batch_data # images, labels, _ = batch_data # print(images, labels) # feed input data input_img = Variable(images, volatile=True) # train:False , val: True label_seg = Variable(labels.long(), volatile=True) # long() ??? input_img = input_img.cuda() label_seg = label_seg.cuda() #print(label_seg) #print('input_img_size: {}, label_seg_size: {}'.format(input_img.size(), label_seg.size())) pred1, _, pred2 = genertor(input_img) pred1 = interp(pred1) # --> [ B x 150 x 321 x 321 ] pred2 = interp(pred2) #pred1 = nn.functional.log_softmax(pred1) #pred2 = nn.functional.log_softmax(pred2) #pred_outputs = nn.functional.log_softmax(pred_outputs) loss_pred_outputs = criterion(pred2, label_seg) loss_pred_outputs_meter.update(loss_pred_outputs.data[0]) print('[Eval] iter {}, loss_pred_outputs:{}'.format( i, loss_pred_outputs.data[0])) acc_pred_outputs, pix_pred_outputs = accuracy(batch_data, pred2) acc_pred_outputs_meter.update(acc_pred_outputs, pix_pred_outputs) if args.visualize: visualize_tv(batch_data, pred1, pred2, args) history['val']['epoch'].append(epoch) history['val']['loss_pred_outputs'].append( loss_pred_outputs_meter.average()) history['val']['acc_pred_outputs'].append(acc_pred_outputs_meter.average()) print('[Eval Summary] Epoch: {}, Loss: {}, Accurarcy: {:4.2f}%'.format( epoch, loss_pred_outputs_meter.average(), acc_pred_outputs_meter.average() * 100)) # plot figure if epoch > 0: print('Plotting loss figure...') fig = plt.figure() plt.plot(np.asarray(history['train']['epoch']), np.log(np.asarray(history['train']['loss_pred_outputs'])), color='b', label='training') plt.plot(np.asarray(history['val']['epoch']), np.log(np.asarray(history['val']['loss_pred_outputs'])), color='c', label='validation') plt.legend() plt.xlabel('Epoch') plt.ylabel('Log(loss)') fig.savefig('{}/loss.png'.format(args.checkpoints_dir), dpi=200) plt.close('all') fig = plt.figure() plt.plot(history['train']['epoch'], history['train']['acc_pred_outputs'], color='b', label='training') plt.plot(history['val']['epoch'], history['val']['acc_pred_outputs'], color='c', label='validation') plt.legend() plt.xlabel('Epoch') plt.ylabel('Accuracy') fig.savefig('{}/accuracy.png'.format(args.checkpoints_dir), dpi=200) plt.close('all') """
def val(epoch, writer_val): # define meters loss_meter, iou_meter = AverageMeter(), AverageMeter() # put model into eval mode model.eval() with torch.no_grad(): for i, sample in enumerate(tqdm(val_dataset_it)): im = sample['image'] instances = sample['instance'].squeeze() class_labels = sample['label'].squeeze() output = model(im) loss = criterion(output, instances, class_labels, **args['loss_w'], iou=True, iou_meter=iou_meter) loss = loss.mean() if args['display'] and i % args['display_it'] == 0: with torch.no_grad(): visualizer.display(im[0], 'image') predictions = cluster.cluster_with_gt( output[0], instances[0], n_sigma=args['loss_opts']['n_sigma']) visualizer.display([predictions.cpu(), instances[0].cpu()], 'pred') sigma = output[0][2].cpu() sigma = (sigma - sigma.min()) / (sigma.max() - sigma.min()) sigma[instances[0] == 0] = 0 visualizer.display(sigma, 'sigma') seed = torch.sigmoid(output[0][3]).cpu() visualizer.display(seed, 'seed') loss_meter.update(loss.item()) if args['tensorboard']: with torch.no_grad(): color_map = draw_flow(torch.tanh(output[0][0:2])) seed = torch.sigmoid(output[0][3:11]).cpu() sigma = output[0][2].cpu() sigma = (sigma - sigma.min()) / (sigma.max() - sigma.min()) sigma[instances[0] == 0] = 0 #predictions = cluster.cluster_with_gt(output[0], instances[0], n_sigma=args['loss_opts']['n_sigma']) color_map = color_map.transpose(2, 0, 1) seed_visual = seed.unsqueeze(1) seed_show = vutils.make_grid(seed_visual, nrow=8, normalize=True, scale_each=True) writer_val.add_image('Input', im[0], epoch) writer_val.add_image('InstanceGT', instances[0].unsqueeze(0).cpu().numpy(), epoch) writer_val.add_image('ColorMap', color_map, epoch) writer_val.add_image('SeedMap', seed_show, epoch) writer_val.add_image('SigmaMap', sigma.unsqueeze(0).cpu().numpy(), epoch) #writer_val.add_image('Prediction', predictions.unsqueeze(0).cpu().numpy(), epoch) return loss_meter.avg, iou_meter.avg
def train(P, epoch, model, criterion, optimizer, scheduler, loader, adversary, logger=None): if logger is None: log_ = print else: log_ = logger.log batch_time = AverageMeter() data_time = AverageMeter() losses = dict() losses['mrt'] = AverageMeter() losses['con'] = AverageMeter() losses['adv'] = AverageMeter() check = time.time() for n, (images, labels) in enumerate(loader): model.train() count = n * P.n_gpus # number of trained samples data_time.update(time.time() - check) check = time.time() labels = labels.to(device) batch_size = images[0].size(0) images_aug1, images_aug2 = images[0].to(device), images[1].to(device) images_pair = torch.cat([images_aug1, images_aug2], dim=0) # 2B loss_adv, loss_mart, outputs_adv, = mart_loss( model, images_pair, labels.repeat(2), optimizer, distance=P.distance, eps_iter=P.alpha, eps=P.epsilon, nb_iter=P.n_iters, beta=P.beta, clip_min=0, clip_max=1, return_adv=True ) ### consistency regularization ### outputs_adv1, outputs_adv2 = outputs_adv.chunk(2) loss_con = _jensen_shannon_div(outputs_adv1, outputs_adv2, P.T) ### total loss ### loss_con *= P.lam loss = loss_mart + loss_adv + loss_con loss.backward() optimizer.step() lr = optimizer.param_groups[0]['lr'] batch_time.update(time.time() - check) ### Log losses ### losses['mrt'].update(loss_mart.item(), batch_size) losses['con'].update(loss_con.item(), batch_size) losses['adv'].update(loss_adv.item(), batch_size) if count % 50 == 0: log_('[Epoch %3d; %3d] [Time %.3f] [Data %.3f] [LR %.5f]\n' '[LossMRT %f] [LossCon %f] [LossAdv %f]' % (epoch, count, batch_time.value, data_time.value, lr, losses['mrt'].value, losses['con'].value, losses['adv'].value)) check = time.time() if P.optimizer == 'sgd': scheduler.step() log_('[DONE] [Time %.3f] [Data %.3f] [LossMRT %f] ' '[LossCon %f] [LossAdv %f]' % (batch_time.average, data_time.average, losses['mrt'].average, losses['con'].average, losses['adv'].average)) if logger is not None: logger.scalar_summary('train/loss_mart', losses['mrt'].average, epoch) logger.scalar_summary('train/loss_con', losses['con'].average, epoch) logger.scalar_summary('train/loss_adversary', losses['adv'].average, epoch) logger.scalar_summary('train/batch_time', batch_time.average, epoch)
def update_network(self, **kwargs): stop = False self.train_data['source']['iterator'] = iter( self.train_data['source']['loader']) self.train_data['target']['iterator'] = iter( self.train_data['target']['loader']) self.iters_per_epoch = len(self.train_data['target']['loader']) iters_counter_within_epoch = 0 data_time = AverageMeter() batch_time = AverageMeter() classifier_loss = AverageMeter() feature_extractor_loss = AverageMeter() prec1_fs = AverageMeter() prec1_ft = AverageMeter() self.feature_extractor.train() self.classifier.train() end = time.time() if self.opt.TRAIN.PROCESS_COUNTER == 'epoch': self.lam = 2 / (1 + math.exp( -1 * 10 * self.epoch / self.opt.TRAIN.MAX_EPOCH)) - 1 self.update_lr() print('value of lam is: %3f' % (self.lam)) while not stop: if self.opt.TRAIN.PROCESS_COUNTER == 'iteration': self.lam = 2 / (1 + math.exp( -1 * 10 * self.iters / (self.opt.TRAIN.MAX_EPOCH * self.iters_per_epoch))) - 1 print('value of lam is: %3f' % (self.lam)) self.update_lr() source_data, source_gt = self.get_samples('source') target_data, _ = self.get_samples('target') source_data = to_cuda(source_data) source_gt = to_cuda(source_gt) target_data = to_cuda(target_data) data_time.update(time.time() - end) feature_source = self.feature_extractor(source_data) output_source = self.classifier(feature_source) feature_target = self.feature_extractor(target_data) output_target = self.classifier(feature_target) weight_concate = torch.cat((self.class_weight, self.class_weight)) loss_task_fs = self.CELossWeight( output_source[:, :self.num_classes], source_gt, self.class_weight) loss_task_ft = self.CELossWeight( output_source[:, self.num_classes:], source_gt, self.class_weight) loss_discrim_source = self.CELossWeight(output_source, source_gt, weight_concate) loss_discrim_target = self.TargetDiscrimLoss(output_target) loss_summary_classifier = loss_task_fs + loss_task_ft + loss_discrim_source + loss_discrim_target source_gt_for_ft_in_fst = source_gt + self.num_classes loss_confusion_source = 0.5 * self.CELossWeight( output_source, source_gt, weight_concate) + 0.5 * self.CELossWeight( output_source, source_gt_for_ft_in_fst, weight_concate) loss_confusion_target = self.ConcatenatedCELoss(output_target) loss_summary_feature_extractor = loss_confusion_source + self.lam * loss_confusion_target self.optimizer_classifier.zero_grad() loss_summary_classifier.backward(retain_graph=True) self.optimizer_classifier.step() self.optimizer_feature_extractor.zero_grad() loss_summary_feature_extractor.backward() self.optimizer_feature_extractor.step() classifier_loss.update(loss_summary_classifier, source_data.size()[0]) feature_extractor_loss.update(loss_summary_feature_extractor, source_data.size()[0]) prec1_fs.update( accuracy(output_source[:, :self.num_classes], source_gt), source_data.size()[0]) prec1_ft.update( accuracy(output_source[:, self.num_classes:], source_gt), source_data.size()[0]) print(" Train:epoch: %d:[%d/%d], LossCla: %3f, LossFeat: %3f, AccFs: %3f, AccFt: %3f" % \ (self.epoch, iters_counter_within_epoch, self.iters_per_epoch, classifier_loss.avg, feature_extractor_loss.avg, prec1_fs.avg, prec1_ft.avg)) batch_time.update(time.time() - end) end = time.time() self.iters += 1 iters_counter_within_epoch += 1 if iters_counter_within_epoch >= self.iters_per_epoch: log = open(os.path.join(self.opt.SAVE_DIR, 'log.txt'), 'a') log.write("\n") log.write(" Train:epoch: %d:[%d/%d], LossCla: %3f, LossFeat: %3f, AccFs: %3f, AccFt: %3f" % \ (self.epoch, iters_counter_within_epoch, self.iters_per_epoch, classifier_loss.avg, feature_extractor_loss.avg, prec1_fs.avg, prec1_ft.avg)) log.close() stop = True
def main(args): # np.random.seed(args.seed) # torch.manual_seed(args.seed) # cudnn.benchmark = True train_loader, test_loader = get_data(args) model = ALE() print(model) model = nn.DataParallel(model).cuda() # Optimizer """ if hasattr(model.module, 'base'): base_param_ids = set(map(id, model.module.base.parameters())) new_params = [p for p in model.parameters() if id(p) not in base_param_ids] param_groups = [ {'params': model.module.base.parameters(), 'lr_mult': 0.1}, {'params': new_params, 'lr_mult': 1.0}] else: param_groups = model.parameters() """ param_groups = model.parameters() optimizer = torch.optim.ADAM(param_groups, lr=args.lr, weight_decay=args.weight_decay) def adjust_lr(epoch): if epoch in [80]: lr = 0.1 * args.lr print('=====> adjust lr to {}'.format(lr)) for g in optimizer.param_groups: g['lr'] = lr * g.get('lr_mult', 1) for epoch in range(0, args.epochs): adjust_lr(epoch) model.train() loss = AverageMeter() iteration = 935 * epoch # print(iteration) for i,d in enumerate(train_loader): iteration += 1 img_embeds, class_embeds, metas = d optimizer.zero_grad() comps = model(img_embeds) mse_loss = nn.L1Loss(size_average=False)(outputs, attr_targets) mse_loss = mse_loss/args.batch_size loss.update(mse_loss.data[0], img_embeds.size(0)) mse_loss.backward() optimizer.step() vis.line(X=torch.ones((1,)) * iteration, Y=torch.Tensor((loss.avg,)), win='reid softmax loss of network', update='append' if iteration > 0 else None, opts=dict(xlabel='iteration', title='Loss', legend=['Loss']) ) if (i + 1) % args.print_freq == 0: print('Epoch: [{}][{}/{}]\t Loss {:.6f} ({:.6f})\t' .format(epoch, i + 1, len(train_loader), loss.val, loss.avg)) save_checkpoint({ 'state_dict': model.module.state_dict(), 'epoch': epoch + 1, 'best_top1': 0, }, False, fpath=osp.join(args.model_dir, 'checkpoint.pth.tar')) test(test_loader, test_cls_list, test_attrs, args.model_dir)
def validate(args, val_loader, model, ce_criterion, device, epoch, model_writer, global_step): batch_time = AverageMeter() data_time = AverageMeter() total_losses = AverageMeter() # switch to train mode model.eval() end = time.time() with torch.no_grad(): for i, (mask, pos_vec, pose3d_discrete_seq, pose3d_discrete_gt_seq, \ mfcc_data, beat_data) in enumerate(val_loader): # BS X T X 48, BS X T X 48, BS X 1 X T, BS X 1 X T, BS X T X 48 bs = pose3d_discrete_seq.size()[0] timesteps = pose3d_discrete_seq.size()[1] # measure data loading time data_time.update(time.time() - end) # Send to device pose3d_discrete_seq = pose3d_discrete_seq.to(device) pose3d_discrete_gt_seq = pose3d_discrete_gt_seq.to(device) mask = mask.to(device) pos_vec = pos_vec.to(device) if args.add_mfcc and args.add_beat: mfcc_data_input = mfcc_data.to(device) beat_data_input = beat_data.to(device).long() pred_out = model(pose3d_discrete_seq, mask, pos_vec, \ mfcc_feats=mfcc_data_input, beat_feats=beat_data_input) # BS X T X 48 X N_cls elif args.add_mfcc: mfcc_data_input = mfcc_data.to(device) pred_out = model(pose3d_discrete_seq, mask, pos_vec, mfcc_feats=mfcc_data_input) # BS X T X 48 X N_cls elif args.add_beat: beat_data_input = beat_data.to(device).long() pred_out = model(pose3d_discrete_seq, mask, pos_vec, beat_feats=beat_data_input) # BS X T X 48 X N_cls else: pred_out = model(pose3d_discrete_seq, mask, pos_vec) # BS X T X 48 X N_cls r_loss = ce_criterion(pred_out, pose3d_discrete_gt_seq, mask.squeeze(1).unsqueeze(2)) total_loss = r_loss model_writer.add_scalar("VAL Loss", np.array(total_loss.item()), global_step) total_losses.update(total_loss.item(), 1) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if (i % args.print_freq == 0): print("\n\n") print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Total Loss {total_loss.val:.4f} ({total_loss.avg:.4f})\n' .format(epoch, i, len(val_loader), batch_time=batch_time, data_time=data_time, total_loss=total_losses, )) return total_losses.avg
def train(cfg, trainLoader, model, criterion, optimizer, epoch): losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() lossesRealCe = AverageMeter() lossesRealMmd = AverageMeter() lossesFakeMmd = AverageMeter() writer = SummaryWriter(logdir=os.path.join(cfg.MISC.OUTPUT_PATH, 'run', '{}'.format(cfg.CFG_NAME))) model.train() for i, data in enumerate(trainLoader): input = Variable(data['img'].cuda()) label = Variable(data['label'].cuda()) domain = Variable(data['domain'].cuda()) # forward output, fc = model(input) # select nir and vis data idxReal = torch.nonzero(label.data != -1) idxReal = Variable(idxReal[:, 0]) outputReal = torch.index_select(output, dim=0, index=idxReal) fcReal = torch.index_select(fc, dim=0, index=idxReal) labelReal = torch.index_select(label, 0, idxReal) domainReal = torch.index_select(domain, 0, idxReal) lossRealCe = criterion(outputReal, labelReal) * cfg.TRAIN.LAMBDA_CE # select real data idxNirReal = torch.nonzero(domainReal.data != 1) idxNirReal = Variable(idxNirReal[:, 0]) fcNirReal = torch.index_select(fcReal, 0, idxNirReal) idxVisReal = torch.nonzero(domainReal.data != 0) idxVisReal = Variable(idxVisReal[:, 0]) fcVisReal = torch.index_select(fcReal, 0, idxVisReal) lossRealMmd = cfg.TRAIN.LAMBDA_MMD * mmdLoss(fcVisReal, fcNirReal) # select fake data idxFake = torch.nonzero(label.data == -1) idxFake = Variable(idxFake[:, 0]) fcFake = torch.index_select(fc, 0, idxFake) domainFake = torch.index_select(domain, 0, idxFake) # select domain of fake data idxNirFake = torch.nonzero(domainFake.data != 1) idxNirFake = Variable(idxNirFake[:, 0]) fcNirFake = torch.index_select(fcFake, 0, idxNirFake) idxVisFake = torch.nonzero(domainFake.data != 0) idxVisFake = Variable(idxVisFake[:, 0]) fcVisFake = torch.index_select(fcFake, 0, idxVisFake) lossFakeMmd = cfg.TRAIN.LAMBDA_MMD * mmdLoss(fcNirFake, fcVisFake) lossHFR = lossRealCe + lossRealMmd + lossFakeMmd optimizer.zero_grad() # TODO(hanyang): need to retain_graph=True?? lossHFR.backward(retain_graph=True) optimizer.step() # measure accuracy and record loss lossesRealCe.update(lossRealCe.item(), outputReal.size(0)) lossesRealMmd.update(lossRealMmd.item(), 1) lossesFakeMmd.update(lossFakeMmd.item(), 1) prec1, prec5 = accuracy(outputReal.data, labelReal.data, topk=(1, 5)) top1.update(prec1.item(), outputReal.size(0)) top5.update(prec5.item(), outputReal.size(0)) # summary writer # writer.add_scalar('loss/cross_entropy', lossesRealCe.avg, epoch) # writer.add_scalar('loss/real_mmd', lossesRealMmd.avg, epoch) # writer.add_scalar('loss/fake_mmd', lossesFakeMmd.avg, epoch) if i % cfg.TRAIN.PRINT_FREQ == 0: # if True: info = '===> Epoch [{:0>3d}][{:3d}/{:3d}] | '.format( epoch, i, len(trainLoader)) info += 'Loss: real ce: {:4.6f} ({:4.6f}) real mmd: {:4.6f} ({:4.6f}) fake mmd: {:4.6f} ({:4.6f}) | '.format( lossesRealCe.val, lossesRealCe.avg, lossesRealMmd.val, lossesRealMmd.avg, lossesFakeMmd.val, lossesFakeMmd.avg) info += 'Prec@1 : {:4.3f} ({:4.3f}) Prec@5 : {:4.3f} ({:4.3f})'.format( top1.val, top1.avg, top5.val, top5.avg) print(info)
def train(P, epoch, model, criterion, optimizer, scheduler, loader, logger=None, simclr_aug=None, linear=None, linear_optim=None): if epoch == 1: # define optimizer and save in P (argument) milestones = [ int(0.6 * P.epochs), int(0.75 * P.epochs), int(0.9 * P.epochs) ] linear_optim = torch.optim.SGD(linear.parameters(), lr=1e-1, weight_decay=P.weight_decay) P.linear_optim = linear_optim P.linear_scheduler = lr_scheduler.MultiStepLR(P.linear_optim, gamma=0.1, milestones=milestones) if logger is None: log_ = print else: log_ = logger.log batch_time = AverageMeter() data_time = AverageMeter() losses = dict() losses['cls'] = AverageMeter() check = time.time() for n, (images, labels) in enumerate(loader): model.eval() count = n * P.n_gpus # number of trained samples data_time.update(time.time() - check) check = time.time() ### SimCLR loss ### if P.dataset != 'imagenet': batch_size = images.size(0) images = images.to(device) images = hflip(images) # 2B with hflip else: batch_size = images[0].size(0) images = images[0].to(device) labels = labels.to(device) images = simclr_aug(images) # simclr augmentation _, outputs_aux = model(images, penultimate=True) penultimate = outputs_aux['penultimate'].detach() outputs = linear(penultimate[0:batch_size] ) # only use 0 degree samples for linear eval loss_ce = criterion(outputs, labels) ### CE loss ### P.linear_optim.zero_grad() loss_ce.backward() P.linear_optim.step() ### optimizer learning rate ### lr = P.linear_optim.param_groups[0]['lr'] batch_time.update(time.time() - check) ### Log losses ### losses['cls'].update(loss_ce.item(), batch_size) if count % 50 == 0: log_('[Epoch %3d; %3d] [Time %.3f] [Data %.3f] [LR %.5f]\n' '[LossC %f]' % ( epoch, count, batch_time.value, data_time.value, lr, losses['cls'].value, )) check = time.time() P.linear_scheduler.step() log_('[DONE] [Time %.3f] [Data %.3f] [LossC %f]' % (batch_time.average, data_time.average, losses['cls'].average)) if logger is not None: logger.scalar_summary('train/loss_cls', losses['cls'].average, epoch) logger.scalar_summary('train/batch_time', batch_time.average, epoch)
score = model(sample_train) # calculate loss loss = criterion(score, target_train) # zero the gradient buffer before calculating the gradients in the current step. optimizer.zero_grad() # backpropagation loss.backward() # update weights; a gradient descent step optimizer.step() ############ # step log # ############ # log loss for this batch loss_meter.update(to_scalar(loss)) # write to tensorboard; used for visualization writer.add_scalar('train/total_loss_iter', to_scalar(loss), step + 1 + dataset_L * epoch) # print the log for every "steps_per_log" batches or the final batch if (step + 1) % cfg.steps_per_log == 0 or (step + 1) % len(train_loader) == 0: log = '{}, Step {}/{} in Ep {}, {:.2f}s, loss:{:.4f}'.format( \ time_str(), step+1, dataset_L, epoch+1, time.time()-step_st, loss_meter.val) print(log) # update the learning rate scheduler.step() ############## # epoch log # ##############
def train_val(model, args): train_dir = args.train_dir val_dir = args.val_dir config = Config(args.config) cudnn.benchmark = True # train train_loader = torch.utils.data.DataLoader( lsp_lspet_data.LSP_Data( 'lspet', train_dir, 8, Mytransforms.Compose([ Mytransforms.RandomResized(), # 这个cpmpose写得还蛮有意思的 Mytransforms.RandomRotate(40), Mytransforms.RandomCrop(368), Mytransforms.RandomHorizontalFlip(), ])), batch_size=config.batch_size, shuffle=True, num_workers=config.workers, pin_memory=True) # val if args.val_dir is not None and config.test_interval != 0: # val val_loader = torch.utils.data.DataLoader(lsp_lspet_data.LSP_Data( 'lsp', val_dir, 8, Mytransforms.Compose([ Mytransforms.TestResized(368), ])), batch_size=config.batch_size, shuffle=True, num_workers=config.workers, pin_memory=True) criterion = nn.MSELoss().cuda() params, multiple = get_parameters(model, config, False) optimizer = torch.optim.SGD(params, config.base_lr, momentum=config.momentum, weight_decay=config.weight_decay) batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() losses_list = [AverageMeter() for i in range(6)] # 6个loss end = time.time() iters = config.start_iters best_model = config.best_model heat_weight = 46 * 46 * 15 / 1.0 while iters < config.max_iter: for i, (input, heatmap, centermap) in enumerate(train_loader): learning_rate = adjust_learning_rate( optimizer, iters, config.base_lr, policy=config.lr_policy, policy_parameter=config.policy_parameter, multiple=multiple) data_time.update(time.time() - end) heatmap = heatmap.cuda(async=True) centermap = centermap.cuda(async=True) input_var = torch.autograd.Variable(input) heatmap_var = torch.autograd.Variable(heatmap) centermap_var = torch.autograd.Variable(centermap) heat1, heat2, heat3, heat4, heat5, heat6 = model( input_var, centermap_var) # 使用intermedia supervise方法,计算每个阶段的loss loss1 = criterion(heat1, heatmap_var) * heat_weight loss2 = criterion(heat2, heatmap_var) * heat_weight loss3 = criterion(heat3, heatmap_var) * heat_weight loss4 = criterion(heat4, heatmap_var) * heat_weight loss5 = criterion(heat5, heatmap_var) * heat_weight loss6 = criterion(heat6, heatmap_var) * heat_weight loss = loss1 + loss2 + loss3 + loss4 + loss5 + loss6 losses.update(loss.data[0], input.size(0)) for cnt, l in enumerate([loss1, loss2, loss3, loss4, loss5, loss6]): losses_list[cnt].update(l.data[0], input.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() iters += 1 if iters % config.display == 0: print( 'Train Iteration: {0}\t' 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 'Learning rate = {2}\n' 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( iters, config.display, learning_rate, batch_time=batch_time, data_time=data_time, loss=losses)) for cnt in range(0, 6): print( 'Loss{0} = {loss1.val:.8f} (ave = {loss1.avg:.8f})\t'. format(cnt + 1, loss1=losses_list[cnt])) print time.strftime( '%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n', time.localtime()) batch_time.reset() data_time.reset() losses.reset() for cnt in range(6): losses_list[cnt].reset() save_checkpoint({ 'iter': iters, 'state_dict': model.state_dict(), }, 0, args.model_name) # val if args.val_dir is not None and config.test_interval != 0 and iters % config.test_interval == 0: model.eval() for j, (input, heatmap, centermap) in enumerate(val_loader): heatmap = heatmap.cuda(async=True) centermap = centermap.cuda(async=True) input_var = torch.autograd.Variable(input) heatmap_var = torch.autograd.Variable(heatmap) centermap_var = torch.autograd.Variable(centermap) heat1, heat2, heat3, heat4, heat5, heat6 = model( input_var, centermap_var) loss1 = criterion(heat1, heatmap_var) * heat_weight loss2 = criterion(heat2, heatmap_var) * heat_weight loss3 = criterion(heat3, heatmap_var) * heat_weight loss4 = criterion(heat4, heatmap_var) * heat_weight loss5 = criterion(heat5, heatmap_var) * heat_weight loss6 = criterion(heat6, heatmap_var) * heat_weight loss = loss1 + loss2 + loss3 + loss4 + loss5 + loss6 losses.update(loss.data[0], input.size(0)) for cnt, l in enumerate( [loss1, loss2, loss3, loss4, loss5, loss6]): losses_list[cnt].update(l.data[0], input.size(0)) batch_time.update(time.time() - end) end = time.time() is_best = losses.avg < best_model best_model = min(best_model, losses.avg) save_checkpoint( { 'iter': iters, 'state_dict': model.state_dict(), }, is_best, args.model_name) if j % config.display == 0: print( 'Test Iteration: {0}\t' 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'. format(j, config.display, batch_time=batch_time, data_time=data_time, loss=losses)) for cnt in range(0, 6): print( 'Loss{0} = {loss1.val:.8f} (ave = {loss1.avg:.8f})\t' .format(cnt + 1, loss1=losses_list[cnt])) print time.strftime( '%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n', time.localtime()) batch_time.reset() losses.reset() for cnt in range(6): losses_list[cnt].reset() model.train()
def evaluate(models, val_loader, interp, criterion, args): loss_meter = AverageMeter() acc_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() time_meter = AverageMeter() models.eval() for i, batch_data in enumerate(val_loader): # forward pass images, labels, _ = batch_data torch.cuda.synchronize() tic = time.perf_counter() pred_seg = torch.zeros(images.size(0), args.num_classes, labels.size(1), labels.size(2)) pred_seg = pred_seg.cuda(args.gpu_id, non_blocking=True) for scale in args.scales: imgs_scale = zoom(images.numpy(), (1., 1., scale, scale), order=1, prefilter=False, mode='nearest') input_images = torch.from_numpy(imgs_scale) if args.gpu_id is not None: input_images = input_images.cuda(args.gpu_id, non_blocking=True) pred_scale = models(input_images) pred_scale = interp(pred_scale) # average the probability pred_seg = pred_seg + pred_scale / len(args.scales) # pred =torch.log(pred) seg_labels = labels.cuda(args.gpu_id, non_blocking=True) loss = criterion(pred_seg, seg_labels) loss_meter.update(loss.data.item()) print('[Eval] iter {}, loss: {}'.format(i, loss.data.item())) # loss_meter.update(loss.item()) # print('[Eval] iter {}, loss: {}'.format(i, loss.item())) labels = as_numpy(labels) _, pred = torch.max(pred_seg, dim=1) pred = as_numpy(pred.squeeze(0).cpu()) # calculate accuracy acc, pix = accuracy(pred, labels) intersection, union = intersectionAndUnion(pred, labels, args.num_classes) acc_meter.update(acc, pix) intersection_meter.update(intersection) union_meter.update(union) torch.cuda.synchronize() time_meter.update(time.perf_counter() - tic) if args.visualize: visualize_result(batch_data, pred_seg, args) # summary iou = intersection_meter.sum / (union_meter.sum + 1e-10) for i, _iou in enumerate(iou): print('class [ {} ], IoU: {:.4f}'.format(i, _iou)) print('[Eval Summary]:') print( 'loss: {:.6f}, Mean IoU: {:.2f}%, Accuracy: {:.2f}%, Inference Time: {:.4f}s' .format(loss_meter.average(), iou.mean() * 100, acc_meter.average() * 100, time_meter.average()))