def validate(args, val_loader, model, criterion, epoch): """ Run evaluation """ top1 = utils.AverageMeter() # switch to evaluate mode model = flopscounter.add_flops_counting_methods(model) model.eval().start_flops_count() model.reset_flops_count() num_step = len(val_loader) with torch.no_grad(): for input, target in tqdm.tqdm(val_loader, total=num_step, ascii=True, mininterval=5): input = input.to(device=device, non_blocking=True) target = target.to(device=device, non_blocking=True) # compute output meta = { 'masks': [], 'device': device, 'gumbel_temp': 1.0, 'gumbel_noise': False, 'epoch': epoch } output, meta = model(input, meta) output = output.float() # measure accuracy and record loss prec1 = utils.accuracy(output.data, target)[0] top1.update(prec1.item(), input.size(0)) if args.plot_ponder: viz.plot_image(input) viz.plot_ponder_cost(meta['masks']) viz.plot_masks(meta['masks']) plt.show() print(f'* Epoch {epoch} - Prec@1 {top1.avg:.3f}') print( f'* FLOPS (multiply-accumulates, MACs) per image: {model.compute_average_flops_cost()[0]/1e6:.6f} MMac' ) model.stop_flops_count() return top1.avg
def train(net, epoch, criterion, optimizer, trainloader, args): loss_meter = utils.AverageMeter() net.train() for i, data in enumerate(trainloader, 0): inputs, labels = data optimizer.zero_grad() inputs = inputs.cuda() labels = labels.cuda() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() loss_meter.update(loss.item()) optimizer.step() #running_loss += loss.item() if i % 1 == 0 and i > 0: print('[Epoch %02d, Minibatch %05d] Loss: %.5f' % (epoch, i, loss_meter.average()))
def evaluate_accuracy(self, data_iterator, net): """ compute top-1 accuracy :param data_iterator: :param net: :return: """ loss = utils.AverageMeter() acc = mx.metric.Accuracy() for idx, (d, l) in enumerate(data_iterator): data = d.as_in_context(self.ctx[0]) label = l.as_in_context(self.ctx[0]) output = net(data) _loss = self.get_loss(output, label) curr_loss = nd.mean(_loss).asscalar() loss.update(curr_loss, data.shape[0]) predictions = nd.argmax(output, axis=1) acc.update(preds=predictions, labels=label) utils.view_bar(idx + 1, len(data_iterator)) # view_bar return acc.get()[1], loss.avg
def extract(self): batch_time = utils.AverageMeter() self.model.eval() end = time.time() for batch_idx, (imgs, target, img_files, class_ids) in tqdm.tqdm(enumerate(self.val_loader), total=len(self.val_loader), desc='Extract', ncols=80, leave=False): gc.collect() if self.cuda: imgs = imgs.cuda() imgs = Variable(imgs, volatile=True) output = self.model(imgs) # N C H W torch.Size([1, 1, 401, 600]) if self.flatten_feature: output = output.view(output.size(0), -1) output = output.data.cpu().numpy() assert output.shape[0] == len(img_files) for i, img_file in enumerate(img_files): base_name = os.path.splitext(img_file)[0] feature_file = os.path.join(self.feature_dir, base_name + ".npy") utils.create_dir(os.path.dirname(feature_file)) np.save(feature_file, output[i]) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % self.print_freq == 0: log_str = 'Extract: [{0}/{1}]\tTime: {batch_time.val:.3f} ({batch_time.avg:.3f})'.format( batch_idx, len(self.val_loader), batch_time=batch_time) print(log_str) self.print_log(log_str)
def train(self): batch_time = utils.AverageMeter() data_time = utils.AverageMeter() losses = utils.AverageMeter() D1 = utils.AverageMeter() EPE = utils.AverageMeter() # switch to train mode self.model.train() time_end = time.time() transform = myTransforms.Stereo_color(same_group=True) nedge = 64 if self.lossfun.flag_mask else 0 for i, (batch, filenames) in enumerate(self.dataloader_train): assert batch.shape[2] >= 6 if (self.use_cuda): batch = batch.cuda() bn, c, h, w = batch.shape assert h > 2 * nedge and w > 2 * nedge batch1 = self.flip_lr_tensor(batch) tmp = batch[:, :6, nedge:h - nedge, nedge:w - nedge] batch_aug = torch.zeros(tmp.shape).type_as(tmp) batch_aug.copy_(tmp) batch_aug = myTransforms.Stereo_color_batch(batch_aug, transform) batch1_aug = self.flip_lr_tensor(batch_aug) imL_pre = Variable(batch_aug[:, :3], volatile=False, requires_grad=False) imR_pre = Variable(batch_aug[:, 3:6], volatile=False, requires_grad=False) imL1_pre = Variable(batch1_aug[:, 3:6], volatile=False, requires_grad=False) imR1_pre = Variable(batch1_aug[:, :3], volatile=False, requires_grad=False) # measure data loading time data_time.update(time.time() - time_end) # compute output scale_dispLs, dispLs = self.model(imL_pre, imR_pre) scale_dispL1s, dispL1s = self.model(imL1_pre, imR1_pre) # compute loss imL = Variable(batch[:, :3, nedge:h - nedge, nedge:w - nedge], volatile=False, requires_grad=False) imR_src = Variable(batch[:, 3:6], volatile=False, requires_grad=False) imL1 = Variable(batch1[:, 3:6, nedge:h - nedge, nedge:w - nedge], volatile=False, requires_grad=False) imR1_src = Variable(batch1[:, :3], volatile=False, requires_grad=False) argst = { "imR_src": imR_src, "imL": imL, "dispLs": dispLs, "scale_dispLs": scale_dispLs, "LeftTop": [nedge, nedge], "imR1_src": imR1_src, "imL1": imL1, "dispL1s": dispL1s, "scale_dispL1s": scale_dispL1s, "LeftTop1": [nedge, nedge], } loss = self.lossfun(argst) losses.update(loss.data[0], imL.size(0)) # if(i < 5): # # visualize images # import matplotlib.pyplot as plt # row, col = 4, 4 # plt.subplot(row, col, 1); plt.imshow(imL[0].data.cpu().numpy().transpose(1, 2, 0)) # plt.subplot(row, col, 2); plt.imshow(imR_src[0].data.cpu().numpy().transpose(1, 2, 0)) # plt.subplot(row, col, 3); plt.imshow(imL1[0].data.cpu().numpy().transpose(1, 2, 0)) # plt.subplot(row, col, 4); plt.imshow(imR1_src[0].data.cpu().numpy().transpose(1, 2, 0)) # plt.subplot(row, col, 5); plt.imshow(imL_pre[0].data.cpu().numpy().transpose(1, 2, 0)) # plt.subplot(row, col, 6); plt.imshow(imR_pre[0].data.cpu().numpy().transpose(1, 2, 0)) # plt.subplot(row, col, 7); plt.imshow(imL1_pre[0].data.cpu().numpy().transpose(1, 2, 0)) # plt.subplot(row, col, 8); plt.imshow(imR1_pre[0].data.cpu().numpy().transpose(1, 2, 0)) # for i in range(len(dispLs)): # plt.subplot(row, col, 9+i); plt.imshow(dispLs[i][0, 0].data.cpu().numpy()) # plt.show() # compute gradient and do SGD step self.optim.zero_grad() loss.backward() self.optim.step() # measure accuracy if (batch.shape[1] >= 7): dispL = batch[:, 6:7, nedge:h - nedge, nedge:w - nedge] d1, epe = self.accuracy(dispLs[0].data, dispL) else: d1, epe = -1, -1 D1.update(d1, imL.size(0)) EPE.update(epe, imL.size(0)) # measure elapsed time batch_time.update(time.time() - time_end) time_end = time.time() # 每十步输出一次 if i % self.args.print_freq == 0: # default=20 print('Train: [{0}][{1}/{2}] | ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) | ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) | ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) | ' 'D1 {D1.val:.3f} ({D1.avg:.3f}) | ' 'EPE {EPE.val:.3f} ({EPE.avg:.3f})'.format( self.epoch, i, len(self.dataloader_train), batch_time=batch_time, data_time=data_time, loss=losses, D1=D1, EPE=EPE)) msg = 'mean train loss: %.3f | mean D1: %.3f | mean EPE: %.3f' % ( losses.avg, D1.avg, EPE.avg) logging.info(msg) return losses.avg, EPE.avg, D1.avg
def train(args, model, optimizer, criterion, dataloader_train, dataloader_val, writer, k_fold): best_pred, best_acc, best_jac, best_sen, best_spe = 0.0, 0.0, 0.0, 0.0, 0.0 best_epoch = 0 step = 0 train_loss = u.AverageMeter() current_time = datetime.now().strftime('%b%d_%H-%M-%S') with open("./logs/%s_%s.txt" % (args.net_work, args.net_index), "a") as f: print(current_time, file=f) for epoch in range(args.num_epochs): train_progressor = pb.Train_ProgressBar(mode='train', fold=k_fold, epoch=epoch, total_epoch=args.num_epochs, model_name=args.net_work, total=len(dataloader_train) * args.batch_size) lr = u.adjust_learning_rate(args, optimizer, epoch) model.train() for i, (data, label) in enumerate(dataloader_train): train_progressor.current = i * args.batch_size if torch.cuda.is_available() and args.use_gpu: data = data.cuda() label = label.cuda() main_out = model(data) # get weight_map weight_map = torch.zeros(args.num_classes).cuda() for t in range(args.num_classes): weight_map[t] = 1 / (torch.sum((label == t).float()) + 1.0) loss_aux = F.binary_cross_entropy_with_logits(main_out, label, weight=None) loss_main = criterion[1](main_out, label) loss = loss_main + loss_aux train_loss.update(loss.item(), data.size(0)) train_progressor.current_loss = train_loss.avg train_progressor.current_lr = lr optimizer.zero_grad() loss.backward() optimizer.step() train_progressor() step += 1 if step % 10 == 0: writer.add_scalar('Train/loss_step_{}'.format(int(k_fold)), loss, step) train_progressor.done() writer.add_scalar('Train/loss_epoch_{}'.format(int(k_fold)), float(train_loss.avg), epoch) Dice, Acc, jaccard, Sensitivity, Specificity = val( args, model, dataloader_val, k_fold, epoch) writer.add_scalar('Valid/Dice_val_{}'.format(int(k_fold)), Dice, epoch) writer.add_scalar('Valid/Acc_val_{}'.format(int(k_fold)), Acc, epoch) writer.add_scalar('Valid/Jac_val_{}'.format(int(k_fold)), jaccard, epoch) writer.add_scalar('Valid/Sen_val_{}'.format(int(k_fold)), Sensitivity, epoch) writer.add_scalar('Valid/Spe_val_{}'.format(int(k_fold)), Specificity, epoch) is_best = Dice > best_pred if is_best: best_pred = max(best_pred, Dice) best_jac = max(best_jac, jaccard) best_acc = max(best_acc, Acc) best_sen = max(best_sen, Sensitivity) best_spe = max(best_spe, Specificity) best_epoch = epoch + 1 checkpoint_dir = os.path.join(args.save_model_path, str(k_fold)) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) checkpoint_latest_name = os.path.join(checkpoint_dir, 'checkpoint_latest.path.tar') u.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_dice': best_pred }, best_pred, epoch, is_best, checkpoint_dir, filename=checkpoint_latest_name) # 记录该折分割效果最好一次epoch的所有参数 best_indicator_message = "f{} best pred in Epoch:{}\nDice={} Accuracy={} jaccard={} Sensitivity={} Specificity={}".format( k_fold, best_epoch, best_pred, best_acc, best_jac, best_sen, best_spe) with open("./logs/%s_%s_best_indicator.txt" % (args.net_work, args.net_index), mode='a') as f: print(best_indicator_message, file=f)
def train_epoch(epoch, train_loader, model, criterion, optimizer, use_cuda=True): batch_time = utils.AverageMeter('Time', ':6.3f') data_time = utils.AverageMeter('Data', ':6.3f') losses = utils.AverageMeter('Loss', ':.4e') top1 = utils.AverageMeter('Acc@1', ':6.2f') top5 = utils.AverageMeter('Acc@5', ':6.2f') progress = utils.ProgressMeter(len(train_loader), batch_time, data_time, top1, top5, losses, prefix="Epoch: [{}]".format(epoch + 1)) print_freq = len(train_loader) // 4 + 1 all_preds = [] all_labels = [] model.train() end = time.time() for i, (paths, inputs, labels) in enumerate(train_loader): if use_cuda: inputs, labels = inputs.cuda(), labels.cuda() data_time.update(time.time() - end) # forward + backward + optimize if type(model).__name__ == 'Inception3' and model.aux_logits: outputs, aux_outputs = model(inputs) loss_aux = criterion(aux_outputs, labels) loss_final = criterion(outputs, labels) loss = loss_final + 0.4 * loss_aux else: outputs = model(inputs) loss = criterion(outputs, labels) acc1, acc5 = utils.accuracy(outputs, labels, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1[0], inputs.size(0)) top5.update(acc5[0], inputs.size(0)) # for confusion matrix calculation _, preds = outputs.topk(1, 1, True, True) all_preds.extend(preds.cpu().numpy()) all_labels.extend(labels.cpu().numpy()) # zero the parameter gradients optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # print statistics if i % print_freq == 0 or i + 1 == len(train_loader): progress.print(i + 1) print(confusion_matrix(all_labels, all_preds)) return top1.avg, top5.avg
def run_train_epoch(model, optimizer, criterion, train_dataloader, epoch, args): batch_time = utils.AverageMeter('Time', ':6.3f') losses = utils.AverageMeter('Loss', ':.4e') grad_norm = utils.AverageMeter('grad_norm', ':.4e') progress = utils.ProgressMeter(len(train_dataloader), batch_time, losses, grad_norm, prefix="Epoch: [{}]".format(epoch)) end = time.time() # trainloader is an iterator. This line extract one minibatch at one time for i, data in enumerate(train_dataloader, 0): feat = data["x"] label = data["y"] num_frs = data["num_frs"] utt_ids = data["utt_ids"] x = feat.to(th.float32) y = label.squeeze(2).long() if th.cuda.is_available(): x = x.cuda() y = y.cuda() x = x.transpose(0, 1) key_padding_mask = th.ones((x.size(1), x.size(0))) for utt in range(len(num_frs)): key_padding_mask[utt, :num_frs[utt]] = 0 src_mask = None if (args.look_ahead > -1): src_mask = th.tril(th.ones(x.size(0), x.size(0)), diagonal=args.look_ahead) src_mask = src_mask.float().masked_fill(src_mask == 0, float('-inf')).masked_fill( src_mask == 1, float(0.0)) src_mask = src_mask.cuda() key_padding_mask = key_padding_mask.bool().cuda() prediction = model(x, src_mask, key_padding_mask) prediction = prediction.transpose(0, 1).contiguous() loss = criterion(prediction.view(-1, prediction.size(2)), y.view(-1)) optimizer.zero_grad() loss.backward() # Gradient Clipping norm = nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) #update lr step = len(train_dataloader) * epoch + i + 1 lr = utils.noam_decay(step, args.warmup_step, args.lr) for param_group in optimizer.param_groups: param_group['lr'] = lr optimizer.step() grad_norm.update(norm) # update loss losses.update(loss.item(), x.size(1)) # measure elapsed time batch_time.update(time.time() - end) if i % args.print_freq == 0: # if not args.hvd or hvd.rank() == 0: progress.print(i)
def train_model(trainloader, testloader, net, device): if torch.cuda.device_count() > 1: # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs print("Activate multi GPU support.") net = nn.DataParallel(net) net.to(device) # define the loss function criterion = (nn.CrossEntropyLoss().cuda() if torch.cuda.is_available() else nn.CrossEntropyLoss()) # Scale the lr linearly with the batch size. # Should be 0.1 when batch_size=128 initial_lr = 0.1 * batch_size / 128 # initialize the optimizer optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=0.9, weight_decay=_WEIGHT_DECAY) # multiply the lr by 0.1 at 100, 150, and 200 epochs div = num_epoch // 4 lr_decay_milestones = [div * 2, div * 3] scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=lr_decay_milestones, gamma=0.1, last_epoch=_LAST_EPOCH) for epoch in range(num_epoch): # loop over the dataset multiple times # set printing functions batch_time = util.AverageMeter('Time/batch', ':.3f') losses = util.AverageMeter('Loss', ':6.2f') top1 = util.AverageMeter('Acc', ':6.2f') progress = util.ProgressMeter(len(trainloader), [losses, top1, batch_time], prefix="Epoch: [{}]".format(epoch + 1)) # switch the model to the training mode net.train() print('current learning rate = {}'.format( optimizer.param_groups[0]['lr'])) # each epoch end = time.time() for i, data in enumerate(trainloader, 0): # get the inputs; data is a list of [inputs, labels] inputs, labels = data[0].to(device), data[1].to(device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) for name, param in net.named_parameters(): if 'threshold' in name: loss += args.sigma * torch.norm(param - args.gtarget) loss.backward() optimizer.step() # measure accuracy and record loss _, batch_predicted = torch.max(outputs.data, 1) batch_accu = 100.0 * (batch_predicted == labels).sum().item() / labels.size(0) losses.update(loss.item(), labels.size(0)) top1.update(batch_accu, labels.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % 50 == 49: # print statistics every 100 mini-batches each epoch progress.display(i) # i = batch id in the epoch # update the learning rate scheduler.step() # print test accuracy every few epochs if epoch % 10 == 9: print('epoch {}'.format(epoch + 1)) test_accu(testloader, net, device) # save the model if required if args.save: print("Saving the trained model.") util.save_models(net.state_dict(), save_folder, suffix=_ARCH) print('Finished Training')
def train(opt, logging): ## Data Prepare ## if opt.main_proc: logging.info("Building dataset") train_dataset = DeepSpeakerDataset(opt, os.path.join(opt.dataroot, 'dev')) train_loader = DeepSpeakerDataLoader(train_dataset, batch_size=1, num_workers=opt.num_workers, shuffle=True, pin_memory=True) val_dataset = DeepSpeakerTestDataset(opt, os.path.join(opt.dataroot, 'test')) val_loader = DeepSpeakerTestDataLoader(val_dataset, batch_size=1, num_workers=opt.num_workers, shuffle=False, pin_memory=True) opt.in_size = train_dataset.in_size opt.out_size = train_dataset.class_nums print('opt.in_size {} opt.out_size {}'.format(opt.in_size, opt.out_size)) if opt.main_proc: logging.info("Building dataset Sucessed") ## Building Model ## if opt.main_proc: logging.info("Building Model") opt.model_type = opt.model_type_1 model_1 = model_select(opt, seq_training=False) ## rnn ge2e opt.model_type = opt.model_type_2 model_2 = model_select(opt, seq_training=False) ## cnn class embedding_size = opt.embedding_size opt.embedding_size = 2 * embedding_size margin = margin_select(opt) opt.embedding_size = embedding_size if opt.resume_1: model_1, opt.total_iters = load(model_1, opt.resume_1, 'state_dict') if opt.resume_2: model_2, opt.total_iters = load(model_2, opt.resume_2, 'state_dict') margin, opt.total_iters = load(margin, opt.resume_2, 'margin_state_dict') if opt.resume: model_1, opt.total_iters = load(model_1, opt.resume, 'state_dict_1') model_2, opt.total_iters = load(model_2, opt.resume, 'state_dict_2') margin, opt.total_iters = load(margin, opt.resume, 'margin_state_dict') # define optimizers for different layer criterion = torch.nn.CrossEntropyLoss().to(opt.device) if opt.optim_type == 'sgd': optimizer = optim.SGD([ {'params': model_1.parameters(), 'weight_decay': 5e-4}, {'params': model_2.parameters(), 'weight_decay': 5e-4}, {'params': margin.parameters(), 'weight_decay': 5e-4}, ], lr=opt.lr, momentum=0.9, nesterov=True) elif opt.optim_type == 'adam': optimizer = optim.Adam([ {'params': model_1.parameters(), 'weight_decay': 5e-4}, {'params': model_2.parameters(), 'weight_decay': 5e-4}, {'params': margin.parameters(), 'weight_decay': 5e-4}, ], lr=opt.lr, betas=(opt.beta1, 0.999)) scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=opt.lr_reduce_step, gamma=opt.lr_reduce_factor, last_epoch=-1) model_1.to(opt.device) model_2.to(opt.device) margin.to(opt.device) if opt.distributed: model_1 = DistributedDataParallel(model_1, device_ids=[opt.local_rank], output_device=opt.local_rank) model_2 = DistributedDataParallel(model_2, device_ids=[opt.local_rank], output_device=opt.local_rank) margin = DistributedDataParallel(margin, device_ids=[opt.local_rank], output_device=opt.local_rank) if opt.main_proc: print(model_1) print(model_2) print(margin) logging.info("Building Model Sucessed") best_perform_acc = 1.0 losses = utils.AverageMeter() class_losses = utils.AverageMeter() embedding_losses = utils.AverageMeter() penalty_losses = utils.AverageMeter() # Initial performance if opt.main_proc: EER = union_evaluate(opt, model_1, model_2, val_loader, logging) best_perform_acc = EER print('>>Start performance: EER = {}<<'.format(best_perform_acc)) save_model = model_1 if isinstance(model_1, DistributedDataParallel): save_model = model_1.module # Start Training total_iters = opt.total_iters for epoch in range(1, opt.total_epoch + 1): while True: model_1.train() model_2.train() margin.train() for i, (data) in enumerate(train_loader, start=0): if i == len(train_loader): break optimizer.zero_grad() # Perform forward and Obtain the loss feature_input, spk_ids = data feature_input = feature_input.to(opt.device) label = spk_ids.to(opt.device).squeeze(0) output_1, attn_1, w_1, b_1 = model_1(feature_input) output_2, attn_2, w_2, b_2 = model_2(feature_input) margin_input = torch.cat((output_1, output_2), dim=1) margin_output = margin(margin_input, label) output_1 = save_model.normalize(output_1) sim_matrix_out = save_model.similarity(output_1, w_1, b_1) embedding_loss = opt.embedding_loss_lamda / (opt.speaker_num * opt.utter_num) * save_model.loss_cal(sim_matrix_out) if opt.att_type == 'multi_attention' and attn_1 is not None: penalty_loss = opt.penalty_loss_lamda * save_model.penalty_loss_cal(attn_1) else: penalty_loss = 0 class_loss = opt.class_loss_lamda * criterion(margin_output, label) loss = embedding_loss + penalty_loss + class_loss loss_dict_reduced = reduce_loss_dict(opt, {'embedding_loss': embedding_loss, 'penalty_loss': penalty_loss, 'class_loss': class_loss}) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() embedding_loss_value = loss_dict_reduced['embedding_loss'].item() penalty_loss_value = loss_dict_reduced['penalty_loss'].item() class_loss_value = loss_dict_reduced['class_loss'].item() # Check the loss and avoid the invaided loss inf = float("inf") if loss_value == inf or loss_value == -inf: print("WARNING: received an inf loss, setting loss value to 0") loss_value = 0 embedding_loss_value = 0 penalty_loss_value = 0 class_loss_value = 0 continue # Perform backward and Check and update the grad loss.backward() if utils.check_grad(model_1.parameters(), opt.clip_grad, opt.ignore_grad) or utils.check_grad(model_2.parameters(), opt.clip_grad, opt.ignore_grad): if opt.main_proc: logging.info('Not a finite gradient or too big, ignoring') optimizer.zero_grad() continue optimizer.step() total_iters += opt.num_gpus # Update the loss for logging losses.update(loss_value) embedding_losses.update(embedding_loss_value) penalty_losses.update(penalty_loss_value) class_losses.update(class_loss_value) # Print the performance on the training dateset 'opt': opt, 'learning_rate': lr, if total_iters % opt.print_freq == 0: scheduler.step(total_iters) if opt.main_proc: lr = scheduler.get_lr() if isinstance(lr, list): lr = max(lr) logging.info('==> Train set steps {} lr: {:.6f}, loss: {:.4f} [ class: {:.4f}, embedding: {:.4f}, penalty_loss {:.4f}]'.format( total_iters, lr, losses.avg, class_losses.avg, embedding_losses.avg, penalty_losses.avg)) if opt.distributed: model_state_dict_1 = model_1.module.state_dict() model_state_dict_2 = model_2.module.state_dict() margin_state_dict = margin.module.state_dict() else: model_state_dict_1 = model_1.state_dict() model_state_dict_2 = model_2.state_dict() margin_state_dict = margin.state_dict() state = {'state_dict_1': model_state_dict_1, 'total_iters': total_iters, 'state_dict_2': model_state_dict_2, 'margin_state_dict': margin_state_dict} filename = 'newest_model.pth' if os.path.isfile(os.path.join(opt.model_dir, filename)): shutil.copy(os.path.join(opt.model_dir, filename), os.path.join(opt.model_dir, 'newest_model.pth_bak')) utils.save_checkpoint(state, opt.model_dir, filename=filename) # Validate the trained model if total_iters % opt.validate_freq == 0: EER = union_evaluate(opt, model_1, model_2, val_loader, logging) ##scheduler.step(EER) if opt.main_proc and EER < best_perform_acc: best_perform_acc = EER print("Found better validated model (EER = %.3f), saving to model_best.pth" % (best_perform_acc)) if opt.distributed: model_state_dict_1 = model_1.module.state_dict() model_state_dict_2 = model_2.module.state_dict() margin_state_dict = margin.module.state_dict() else: model_state_dict_1 = model_1.state_dict() model_state_dict_2 = model_2.state_dict() margin_state_dict = margin.state_dict() state = {'state_dict_1': model_state_dict_1, 'total_iters': total_iters, 'state_dict_2': model_state_dict_2, 'margin_state_dict': margin_state_dict} filename = 'model_best.pth' if os.path.isfile(os.path.join(opt.model_dir, filename)): shutil.copy(os.path.join(opt.model_dir, filename), os.path.join(opt.model_dir, 'model_best.pth_bak')) utils.save_checkpoint(state, opt.model_dir, filename=filename) model_1.train() model_2.train() margin.train() losses.reset() class_losses.reset() embedding_losses.reset() penalty_losses.reset() if total_iters > opt.max_iters and opt.main_proc: logging.info('finish training, steps is {}'.format(total_iters)) return model_1
def validate(loader, ds_rd, model, criterion, n_iter=-1, logger=None, opts=None, if_svVis=False, visualizer=None): ''' loop through loder, all res, get preds and gts and normled dist. With flip test for higher acc. for preds, bbs, jts_ori, jts_weigth out, recover preds_ori, dists_nmd, pckh( dist and joints_vis filter, , print, if_sv then save all these :param loader: :param ds_rd: the reader, givens the length and flip pairs :param model: :param criterion: :param optimizer: :param epoch: :param n_iter: :param logger: :param opts: :return: ''' batch_time = ut.AverageMeter() losses = ut.AverageMeter() acc = ut.AverageMeter() # switch to evaluate mode model.eval() num_samples = ds_rd.n_smpl n_jt = ds_rd.joint_num_ori # to accum rst preds_hm = [] bbs = [] li_joints_ori = [] li_joints_vis = [] li_l_std_ori = [] with torch.no_grad(): end = time.time() for i, inp_dct in enumerate(loader): # compute output input = inp_dct['pch'] target = inp_dct['hms'] target_weight = inp_dct['joints_vis'] bb = inp_dct['bb'] joints_ori = inp_dct['joints_ori'] l_std_ori = inp_dct['l_std_ori'] if i >= n_iter and n_iter > 0: # limiting iters break outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs output_ori = output.clone() # original output of original image if opts.if_flipTest: input_flipped = input.flip(3).clone() # flipped input outputs_flipped = model(input_flipped) # flipped output if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped_ori = output_flipped.clone( ) # hm only head changed? not possible?? output_flipped = flip_back(output_flipped.cpu().numpy(), ds_rd.flip_pairs) output_flipped = torch.from_numpy( output_flipped.copy()).cuda() # N x n_jt xh x w tch # feature is not aligned, shift flipped heatmap for higher accuracy if_shiftHM = True # no idea why if if_shiftHM: # check original # print('run shift flip') output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred_hm = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # preds can be furhter refined with subpixel trick, but it is already good enough. # measure elapsed time batch_time.update(time.time() - end) end = time.time() # keep rst preds_hm.append(pred_hm) # already numpy, 2D bbs.append(bb.numpy()) li_joints_ori.append(joints_ori.numpy()) li_joints_vis.append(target_weight.cpu().numpy()) li_l_std_ori.append(l_std_ori.numpy()) if if_svVis and 0 == i % opts.svVis_step: sv_dir = opts.vis_test_dir # exp/vis/Human36M # batch version mod0 = opts.mod_src[0] mean = ds_rd.means[mod0] std = ds_rd.stds[mod0] img_patch_vis = ut.ts2cv2(input[0], mean, std) # to CV BGR img_patch_vis_flipped = ut.ts2cv2(input_flipped[0], mean, std) # to CV BGR # pseudo change cm = getattr(cv2, ds_rd.dct_clrMap[mod0]) img_patch_vis = cv2.applyColorMap(img_patch_vis, cm) img_patch_vis_flipped = cv2.applyColorMap( img_patch_vis_flipped, cm) # original version get img from the ds_rd , different size , plot ing will vary from each other # warp preds to ori # draw and save with index. idx_test = i * opts.batch_size # image index skels_idx = ds_rd.skels_idx # get pred2d_patch pred2d_patch = np.ones((n_jt, 3)) # 3rd for vis pred2d_patch[:, :2] = pred_hm[0] / opts.out_shp[ 0] * opts.sz_pch[1] # only first vis.save_2d_skels( img_patch_vis, pred2d_patch, skels_idx, sv_dir, suffix='-' + mod0, idx=idx_test ) # make sub dir if needed, recover to test set index by indexing. # save the hm images. save flip test hm_ori = ut.normImg( output_ori[0].cpu().numpy().sum(axis=0)) # rgb one hm_flip = ut.normImg( output_flipped[0].cpu().numpy().sum(axis=0)) hm_flip_ori = ut.normImg( output_flipped_ori[0].cpu().numpy().sum(axis=0)) # subFd = mod0+'_hmFlip_ori' # vis.save_img(hm_flip_ori, sv_dir, idx_test, sub=subFd) # combined # img_cb = vis.hconcat_resize([img_patch_vis, hm_ori, img_patch_vis_flipped, hm_flip_ori]) # flipped hm # subFd = mod0+'_cbFlip' # vis.save_img(img_cb, sv_dir, idx_test, sub=subFd) if i % opts.print_freq == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) preds_hm = np.concatenate(preds_hm, axis=0) # N x n_jt x 2 bbs = np.concatenate(bbs, axis=0) joints_ori = np.concatenate(li_joints_ori, axis=0) joints_vis = np.concatenate(li_joints_vis, axis=0) l_std_ori_all = np.concatenate(li_l_std_ori, axis=0) preds_ori = ut.warp_coord_to_original(preds_hm, bbs, sz_out=opts.out_shp) err_nmd = ut.distNorm(preds_ori, joints_ori, l_std_ori_all) ticks = np.linspace(0, 0.5, 11) # 11 ticks pck_all = ut.pck(err_nmd, joints_vis, ticks=ticks) # save to plain format for easy processing rst = { 'preds_ori': preds_ori.tolist(), 'joints_ori': joints_ori.tolist(), 'l_std_ori_all': l_std_ori_all.tolist(), 'err_nmd': err_nmd.tolist(), 'pck': pck_all.tolist() } return rst
criterion = nn.CrossEntropyLoss() if cuda: model = model.cuda() criterion = criterion.cuda() optim = torch.optim.SGD(model.parameters(), lr=train_cfg['lr'], momentum=train_cfg['momentum'], weight_decay=train_cfg['weight_decay']) # lr_scheduler = torch.optim.lr_scheduler.StepLR(optim, train_cfg['step_size'], gamma=train_cfg['gamma'], last_epoch=-1) lr_scheduler = None log = utils.print_log(configure['log_dir'], [net_cfg['type'], timestamp]) log.write(str(net_cfg)) log.write(str(train_cfg)) epoch_time = utils.AverageMeter() batch_time = utils.AverageMeter() data_time = utils.AverageMeter() losses = utils.AverageMeter() top1 = utils.AverageMeter() # --------------------train & validation & save checkpoint---------------- # epoch = 0 last_iteration = 0 print_freq = 1 best_top1 = 0 max_epoch = train_cfg['max_epoch'] print("train max epoch {0}".format(max_epoch)) for epoch in tqdm.trange(epoch, max_epoch, desc='Train', ncols=80): # 调整进度条宽度为80 # self.epoch = epoch
def train(opt, logging): ## Data Prepare ## if opt.main_proc: logging.info("Building dataset") train_dataset = DeepSpeakerUttDataset(opt, os.path.join(opt.dataroot, 'train')) if not opt.distributed: train_sampler = BucketingSampler(train_dataset, batch_size=opt.batch_size) else: train_sampler = DistributedBucketingSampler(train_dataset, batch_size=opt.batch_size, num_replicas=opt.num_gpus, rank=opt.local_rank) train_loader = DeepSpeakerUttDataLoader(train_dataset, num_workers=opt.num_workers, batch_sampler=train_sampler) val_dataset = DeepSpeakerTestDataset(opt, os.path.join(opt.dataroot, 'test')) val_loader = DeepSpeakerTestDataLoader(val_dataset, batch_size=1, num_workers=opt.num_workers, shuffle=False, pin_memory=True) opt.in_size = train_dataset.in_size opt.out_size = train_dataset.class_nums print('opt.in_size {} opt.out_size {}'.format(opt.in_size, opt.out_size)) if opt.main_proc: logging.info("Building dataset Sucessed") ## Building Model ## if opt.main_proc: logging.info("Building Model") model = model_select(opt) margin = margin_select(opt) if opt.resume: model, opt.total_iters = load(model, opt.resume, 'state_dict') margin, opt.total_iters = load(margin, opt.resume, 'margin_state_dict') # define optimizers for different layer criterion = torch.nn.CrossEntropyLoss().to(opt.device) if opt.optim_type == 'sgd': optimizer = optim.SGD([ {'params': model.parameters(), 'weight_decay': 5e-4}, {'params': margin.parameters(), 'weight_decay': 5e-4} ], lr=opt.lr, momentum=0.9, nesterov=True) elif opt.optim_type == 'adam': optimizer = optim.Adam([ {'params': model.parameters(), 'weight_decay': 5e-4}, {'params': margin.parameters(), 'weight_decay': 5e-4} ], lr=opt.lr, betas=(opt.beta1, 0.999)) elif opt.optim_type == 'radam': optimizer = RAdam([ {'params': model.parameters(), 'weight_decay': 5e-4}, {'params': margin.parameters(), 'weight_decay': 5e-4} ], lr=opt.lr) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20, 40], gamma=0.1) model.to(opt.device) margin.to(opt.device) if opt.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[opt.local_rank], output_device=opt.local_rank) margin = torch.nn.parallel.DistributedDataParallel(margin, device_ids=[opt.local_rank], output_device=opt.local_rank) if opt.main_proc: print(model) print(margin) logging.info("Building Model Sucessed") best_perform_eer = 1.0 losses = utils.AverageMeter() acc = utils.AverageMeter() # Initial performance if opt.main_proc: EER = evaluate(opt, model, val_loader, logging) best_perform_eer = EER print('>>Start performance: EER = {}<<'.format(best_perform_eer)) total_iters = opt.total_iters for epoch in range(1, opt.total_epoch + 1): train_sampler.shuffle(epoch) scheduler.step() # train model if opt.main_proc: logging.info('Train Epoch: {}/{} ...'.format(epoch, opt.total_epoch)) model.train() margin.train() since = time.time() for i, (data) in enumerate(train_loader, start=0): utt_ids, inputs, targets = data inputs, label = inputs.to(opt.device), targets.to(opt.device) optimizer.zero_grad() raw_logits, attn, w, b = model(inputs) output = margin(raw_logits, label) #loss = criterion(output, label) loss = cal_loss(output, label, criterion, smoothing=opt.smoothing) loss_dict_reduced = reduce_loss_dict(opt, {'loss': loss}) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() # Check the loss and avoid the invaided loss inf = float("inf") if loss_value == inf or loss_value == -inf: print("WARNING: received an inf loss, setting loss value to 0") loss_value = 0 continue loss.backward() if utils.check_grad(model.parameters(), opt.clip_grad, opt.ignore_grad): if opt.main_proc: logging.info('Not a finite gradient or too big, ignoring') optimizer.zero_grad() continue optimizer.step() total_iters += opt.num_gpus losses.update(loss_value) # print train information if total_iters % opt.print_freq == 0 and opt.main_proc: # current training accuracy _, predict = torch.max(output.data, 1) total = label.size(0) correct = (np.array(predict.cpu()) == np.array(label.data.cpu())).sum() time_cur = (time.time() - since) / 100 since = time.time() logging.info("Iters: {:0>6d}/[{:0>2d}], loss: {:.4f} ({:.4f}), train_accuracy: {:.4f}, time: {:.2f} s/iter, learning rate: {}".format(total_iters, epoch, loss_value, losses.avg, correct/total, time_cur, scheduler.get_lr()[0])) # save model if total_iters % opt.save_freq == 0 and opt.main_proc: logging.info('Saving checkpoint: {}'.format(total_iters)) if opt.distributed: model_state_dict = model.module.state_dict() margin_state_dict = margin.module.state_dict() else: model_state_dict = model.state_dict() margin_state_dict = margin.state_dict() state = {'state_dict': model_state_dict, 'margin_state_dict': margin_state_dict, 'total_iters': total_iters,} filename = 'newest_model.pth' if os.path.isfile(os.path.join(opt.model_dir, filename)): shutil.copy(os.path.join(opt.model_dir, filename), os.path.join(opt.model_dir, 'newest_model.pth_bak')) utils.save_checkpoint(state, opt.model_dir, filename=filename) # Validate the trained model if total_iters % opt.validate_freq == 0: EER = evaluate(opt, model, val_loader, logging) ##scheduler.step(EER) if opt.main_proc and EER < best_perform_eer: best_perform_eer = EER logging.info("Found better validated model (EER = %.3f), saving to model_best.pth" % (best_perform_eer)) if opt.distributed: model_state_dict = model.module.state_dict() margin_state_dict = margin.module.state_dict() else: model_state_dict = model.state_dict() margin_state_dict = margin.state_dict() state = {'state_dict': model_state_dict, 'margin_state_dict': margin_state_dict, 'total_iters': total_iters,} filename = 'model_best.pth' if os.path.isfile(os.path.join(opt.model_dir, filename)): shutil.copy(os.path.join(opt.model_dir, filename), os.path.join(opt.model_dir, 'model_best.pth_bak')) utils.save_checkpoint(state, opt.model_dir, filename=filename) model.train() margin.train() losses.reset()
def train(train_loader, model, optimizer, epoch, writer, logger, config): device = torch.device("cuda") if config.label_smooth > 0: criterion = CrossEntropyLabelSmooth(config.n_classes, config.label_smooth).to(device) else: criterion = nn.CrossEntropyLoss().to(device) top1 = utils.AverageMeter() top5 = utils.AverageMeter() losses = utils.AverageMeter() step_num = len(train_loader) cur_step = epoch * step_num cur_lr = optimizer.param_groups[0]['lr'] if config.local_rank == 0: logger.info("Train Epoch {} LR {}".format(epoch, cur_lr)) writer.add_scalar('train/lr', cur_lr, cur_step) model.train() for step, (X, y) in enumerate(train_loader): X, y = X.to(device, non_blocking=True), y.to(device, non_blocking=True) N = X.size(0) X, target_a, target_b, lam = data_utils.mixup_data(X, y, config.mixup_alpha, use_cuda=True) optimizer.zero_grad() logits, logits_aux = model(X) # loss = criterion(logits, y) loss = data_utils.mixup_criterion(criterion, logits, target_a, target_b, lam) if config.aux_weight > 0: # loss_aux = criterion(logits_aux, y) loss_aux = data_utils.mixup_criterion(criterion, logits_aux, target_a, target_b, lam) loss = loss + config.aux_weight * loss_aux if config.use_amp: from apex import amp with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # gradient clipping nn.utils.clip_grad_norm_(model.module.parameters(), config.grad_clip) optimizer.step() prec1, prec5 = utils.accuracy(logits, y, topk=(1, 5)) if config.distributed: reduced_loss = utils.reduce_tensor(loss.data, config.world_size) prec1 = utils.reduce_tensor(prec1, config.world_size) prec5 = utils.reduce_tensor(prec5, config.world_size) else: reduced_loss = loss.data losses.update(reduced_loss.item(), N) top1.update(prec1.item(), N) top5.update(prec5.item(), N) torch.cuda.synchronize() if config.local_rank == 0 and (step % config.print_freq == 0 or step == step_num): logger.info( "Train: Epoch {:2d}/{} Step {:03d}/{:03d} Loss {losses.avg:.3f} " "Prec@(1,5) ({top1.avg:.1%}, {top5.avg:.1%})".format( epoch + 1, config.epochs, step, step_num, losses=losses, top1=top1, top5=top5)) if config.local_rank == 0: writer.add_scalar('train/loss', reduced_loss.item(), cur_step) writer.add_scalar('train/top1', prec1.item(), cur_step) writer.add_scalar('train/top5', prec5.item(), cur_step) cur_step += 1 if config.local_rank == 0: logger.info("Train: Epoch {:2d}/{} Final Prec@1 {:.4%}".format( epoch + 1, config.epochs, top1.avg))
def train(args, model, optimizer, criterion, dataloader_train, dataloader_val, writer, k_fold): best_pred, best_pre, best_rec, best_f1 = 0.0, 0.0, 0.0, 0.0 best_epoch = 0 step = 0 train_loss = u.AverageMeter() top1_m = u.AverageMeter() current_time = datetime.now().strftime('%b%d_%H-%M-%S') with open("./logs/%s.txt" % (args.model_name), "a") as f: print(current_time, file=f) for epoch in range(args.num_epochs): train_progressor = pb.Train_ProgressBar(mode='train', fold=k_fold, epoch=epoch, total_epoch=args.num_epochs, model_name=args.model_name, total=len(dataloader_train) * args.batch_size) lr = u.adjust_learning_rate(args, optimizer, epoch) model.train() for i, (data, label) in enumerate(dataloader_train): train_progressor.current = i * args.batch_size if torch.cuda.is_available() and args.use_gpu: data = data.cuda() label = label.cuda() pred = model(data) loss = criterion(pred, label) top1 = u.accuracy(pred, label) top1_m.update(top1[0], data.size(0)) train_loss.update(loss.item(), data.size(0)) train_progressor.current_loss = train_loss.avg train_progressor.current_lr = lr train_progressor.top1 = top1_m.avg optimizer.zero_grad() loss.backward() optimizer.step() train_progressor() step += 1 if step % 10 == 0: writer.add_scalar('Train/loss_step_{}'.format(int(k_fold)), loss, step) train_progressor.done() writer.add_scalar('Train/loss_epoch_{}'.format(int(k_fold)), float(train_loss.avg), epoch) Accuracy, Precision, Recall, F1 = val(args, model, criterion, dataloader_val, epoch, k_fold) writer.add_scalar('Valid/Accuracy_val_{}'.format(int(k_fold)), Accuracy, epoch) writer.add_scalar('Valid/Precision_val_{}'.format(int(k_fold)), Precision, epoch) writer.add_scalar('Valid/Recall_val_{}'.format(int(k_fold)), Recall, epoch) writer.add_scalar('Valid/F1_val_{}'.format(int(k_fold)), F1, epoch) is_best = Accuracy > best_pred if is_best: best_pred = max(best_pred, Accuracy) best_pre = max(best_pre, Precision) best_rec = max(best_rec, Recall) best_f1 = max(best_f1, F1) best_epoch = epoch + 1 checkpoint_dir = os.path.join(args.save_model_path, str(k_fold)) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) checkpoint_latest_name = os.path.join(checkpoint_dir, 'checkpoint_latest.path.tar') # print(best_pred) u.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_dice': best_pred }, best_pred, epoch, is_best, checkpoint_dir, filename=checkpoint_latest_name) # 记录该折分类最好一次epoch的所有参数 best_indicator_message = "f{} best pred in Epoch:{}\Accuracy={} Precision={} Recall={} F1={}".format( k_fold, best_epoch, best_pred, best_pre, best_rec, best_f1) with open("./logs/%s_%s_best_indicator.txt" % (args.model_name), mode='a') as f: print(best_indicator_message, file=f)
def main(): global args args = get_parser().parse_args() LOGGER.info(args) # Get input image size and save name list. # Each line of data_list should contain # image_0, image_1, (optional) ground truth, (optional) ground truth mask. with open(args.data_list, 'r') as file_list: fnames = file_list.readlines() assert len( fnames[0].strip().split(' ') ) == 2 + args.evaluate + args.evaluate * args.additional_flow_masks input_size = cv2.imread( os.path.join(args.data_root, fnames[0].split(' ')[0])).shape if args.visualize or args.save_inputs or args.save_refined: names = [l.strip().split(' ')[0].split('/')[-1] for l in fnames] sub_folders = [ l.strip().split(' ')[0][:-len(names[i])] for i, l in enumerate(fnames) ] names = [l.split('.')[0] for l in names] # Prepare data. mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] target_height, target_width = get_target_size(input_size[0], input_size[1]) transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) data = hd3data.HD3Data( mode='flow', data_root=args.data_root, data_list=args.data_list, label_num=args.evaluate + args.evaluate * args.additional_flow_masks, transform=transform, out_size=True) data_loader = torch.utils.data.DataLoader( data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # Setup models. model_hd3 = hd3model.HD3Model('flow', args.encoder, args.decoder, [4, 4, 4, 4, 4], args.context).cuda() model_hd3 = torch.nn.DataParallel(model_hd3).cuda() model_hd3.eval() refinement_network = PPacNet( args.kernel_size_preprocessing, args.kernel_size_joint, args.conv_specification, args.shared_filters, args.depth_layers_prob, args.depth_layers_guidance, args.depth_layers_joint) model_refine = refinement_models.EpeNet(refinement_network).cuda() model_refine = torch.nn.DataParallel(model_refine).cuda() model_refine.eval() # Load indicated models. name_hd3_model = args.model_hd3_path if os.path.isfile(name_hd3_model): checkpoint = torch.load(name_hd3_model) model_hd3.load_state_dict(checkpoint['state_dict']) LOGGER.info("Loaded HD3 checkpoint '{}'".format(name_hd3_model)) else: LOGGER.info("No checkpoint found at '{}'".format(name_hd3_model)) name_refinement_model = args.model_refine_path if os.path.isfile(name_refinement_model): checkpoint = torch.load(name_refinement_model) model_refine.load_state_dict(checkpoint['state_dict']) LOGGER.info( "Loaded refinement checkpoint '{}'".format(name_refinement_model)) else: LOGGER.info( "No checkpoint found at '{}'".format(name_refinement_model)) if args.evaluate: epe_hd3 = utils.AverageMeter() outliers_hd3 = utils.AverageMeter() epe_refined = utils.AverageMeter() outliers_refined = utils.AverageMeter() if args.visualize: visualization_folder = os.path.join(args.save_folder, 'visualizations') utils.check_makedirs(visualization_folder) if args.save_inputs: input_folder = os.path.join(args.save_folder, 'hd3_inputs') utils.check_makedirs(input_folder) if args.save_refined: refined_folder = os.path.join(args.save_folder, 'refined_flow') utils.check_makedirs(refined_folder) # Start inference. with torch.no_grad(): for i, (img_list, label_list, img_size) in enumerate(data_loader): if i % 10 == 0: LOGGER.info('Done with {}/{} samples'.format( i, len(data_loader))) img_size = img_size.cpu().numpy() img_list = [img.to(torch.device("cuda")) for img in img_list] label_list = [ label.to(torch.device("cuda")) for label in label_list ] # Resize input images. resized_img_list = [ torch.nn.functional.interpolate( img, (target_height, target_width), mode='bilinear', align_corners=True) for img in img_list ] # Get HD3 flow. output = model_hd3( img_list=resized_img_list, label_list=label_list, get_full_vect=True, get_full_prob=True, get_epe=args.evaluate) # Upscale flow to full resolution. for level, level_flow in enumerate(output['full_vect']): scale_factor = 1 / 2**(6 - level) output['full_vect'][level] = resize_dense_vector( level_flow * scale_factor, img_size[0, 1], img_size[0, 0]) hd3_flow = output['full_vect'][-1] # Evaluate HD3 output if required. if args.evaluate: epe_hd3.update( losses.endpoint_error(hd3_flow, label_list[0]).mean().data, hd3_flow.size(0)) outliers_hd3.update( losses.outlier_rate(hd3_flow, label_list[0]).mean().data, hd3_flow.size(0)) # Upscale and interpolate flow probabilities. probabilities = prob_utils.get_upsampled_probabilities_hd3( output['full_vect'], output['full_prob']) if args.save_inputs: save_hd3_inputs( hd3_flow, probabilities, input_folder, sub_folders[i * args.batch_size:(i + 1) * args.batch_size], names[i * args.batch_size:(i + 1) * args.batch_size]) continue # Refine flow with PPAC network. log_probabilities = prob_utils.safe_log(probabilities) output_refine = model_refine( hd3_flow, log_probabilities, img_list[0], label_list=label_list, get_loss=args.evaluate, get_epe=args.evaluate, get_outliers=args.evaluate) # Evaluate refined output if required if args.evaluate: epe_refined.update(output_refine['epe'].mean().data, hd3_flow.size(0)) outliers_refined.update(output_refine['outliers'].mean().data, hd3_flow.size(0)) # Save visualizations of optical flow if required. if args.visualize: refined_flow = output_refine['flow'] ground_truth = None if args.evaluate: ground_truth = label_list[0][:, :2] save_visualizations( hd3_flow, refined_flow, ground_truth, visualization_folder, sub_folders[i * args.batch_size:(i + 1) * args.batch_size], names[i * args.batch_size:(i + 1) * args.batch_size]) # Save refined optical flow if required. if args.save_refined: refined_flow = output_refine['flow'] save_refined_flow( refined_flow, refined_folder, sub_folders[i * args.batch_size:(i + 1) * args.batch_size], names[i * args.batch_size:(i + 1) * args.batch_size]) if args.evaluate: LOGGER.info( 'Accuracy of HD3 optical flow: ' 'AEE={epe_hd3.avg:.4f}, Outliers={outliers_hd3.avg:.4f}'.format( epe_hd3=epe_hd3, outliers_hd3=outliers_hd3)) if not args.save_inputs: LOGGER.info( 'Accuracy of refined optical flow: ' 'AEE={epe_refined.avg:.4f}, Outliers={outliers_refined.avg:.4f}' .format( epe_refined=epe_refined, outliers_refined=outliers_refined))
def run_train_epoch(model, optimizer, dataloader, epoch, trans_model, tree, supervision_opts, aligner, den, chain_opts, args): batch_time = utils.AverageMeter('Time', ':6.3f') losses = utils.AverageMeter('Loss', ':.4e') grad_norm = utils.AverageMeter('grad_norm', ':.4e') progress = utils.ProgressMeter(len(dataloader), batch_time, losses, grad_norm, prefix="Epoch: [{}]".format(epoch)) criterion = ops.ChainObjtiveFunction.apply end = time.time() for i, batch in enumerate(dataloader): feat = batch["x"] label = batch["y"] num_frs = batch["num_frs"] utt_ids = batch["utt_ids"] aux = batch["aux"] #word labels for se loss frame_shift = (epoch % supervision_opts.frame_subsampling_factor) * -1 x = feat.to(th.float32) x = th.roll(x, frame_shift, 1) x = x.unfold(1, 1, supervision_opts.frame_subsampling_factor).squeeze(-1) x = x.cuda() y = label.squeeze(2) loss = 0.0 prediction = model(x) for j in range(len(num_frs)): trans_ids = y[j, :num_frs[j]].tolist() phone_ali = aligner.to_phone_alignment(trans_ids) phones = list() durations = list() for item in phone_ali: phones.append(item[0]) durations.append(item[2]) proto_supervision = kaldi_chain.alignment_to_proto_supervision( supervision_opts, phones, durations) supervision = kaldi_chain.proto_supervision_to_supervision( tree, trans_model, proto_supervision, True) loglike_j = prediction[j, :supervision.frames_per_sequence, :] loss += criterion(loglike_j, den, supervision, chain_opts) optimizer.zero_grad() loss.backward() #update lr step = len(dataloader) * epoch + i + 1 lr = utils.noam_decay(step, args.warmup_steps, args.lr) for param_group in optimizer.param_groups: param_group['lr'] = lr # Gradient Clipping (th 5.0) norm = nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() grad_norm.update(norm) # update the loss tot_frs = np.array(num_frs).sum() losses.update(loss.item() / tot_frs) # measure the elapsed time batch_time.update(time.time() - end) # save model if hvd.rank() == 0 and i % args.save_freq == 0: checkpoint = {} checkpoint['model'] = model.state_dict() checkpoint['optimizer'] = optimizer.state_dict() output_file = args.exp_dir + '/chain.model.' + str(i) + '.tar' th.save(checkpoint, output_file) if hvd.rank() == 0 and i % args.print_freq == 0: progress.print(i)
def main(): # torch.manual_seed(args.seed) # torch.cuda.manual_seed_all(args.seed) # np.random.seed(args.seed) saver = Saver(args) # set log log_format = '%(asctime)s %(message)s' logging.basicConfig(level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p', filename=os.path.join(saver.experiment_dir, 'log.txt'), filemode='w') console = logging.StreamHandler() console.setLevel(logging.INFO) logging.getLogger().addHandler(console) if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) saver.create_exp_dir(scripts_to_save=glob.glob('*.py') + glob.glob('*.sh') + glob.glob('*.yml')) saver.save_experiment_config() summary = TensorboardSummary(saver.experiment_dir) writer = summary.create_summary() best_pred = 0 logging.info(args) device = torch.device('cuda') criterion = nn.CrossEntropyLoss() criterion = criterion.to(device) # # ''' Compute FLOPs and Params ''' # maml = Meta(args, criterion) # flops, params = get_model_complexity_info(maml.model, (84, 84), as_strings=False, print_per_layer_stat=True) # logging.info('FLOPs: {} MMac Params: {}'.format(flops / 10 ** 6, params)) # # maml = Meta(args, criterion).to(device) # tmp = filter(lambda x: x.requires_grad, maml.parameters()) # num = sum(map(lambda x: np.prod(x.shape), tmp)) # #logging.info(maml) # logging.info('Total trainable tensors: {}'.format(num)) # batch_size here means total episode number mini = MiniImagenet(args.data_path, mode='train', n_way=args.n_way, k_shot=args.k_spt, k_query=args.k_qry, batch_size=args.batch_size, resize=args.img_size, task_id=None) mini_test = MiniImagenet(args.data_path, mode='test', n_way=args.n_way, k_shot=args.k_spt, k_query=args.k_qry, batch_size=args.test_batch_size, resize=args.img_size, task_id=args.task_id) train_loader = DataLoader(mini, args.meta_batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) test_loader = DataLoader(mini_test, args.meta_test_batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) ''' Decoding ''' model = Network(args, args.init_channels, args.n_way, args.layers, criterion, pretrained=True).cuda() inner_optimizer_theta = torch.optim.SGD(model.arch_parameters(), lr=args.update_lr_theta) #inner_optimizer_theta = torch.optim.SGD(model.arch_parameters(), lr=100) inner_optimizer_w = torch.optim.SGD(model.parameters(), lr=args.update_lr_w) # load state dict pretrained_path = '/data2/dongzelian/NAS/meta_nas/run_meta_nas/mini-imagenet/meta-nas/experiment_21/model_best.pth.tar' pretrain_dict = torch.load(pretrained_path)['state_dict_w'] model_dict = {} state_dict = model.state_dict() for k, v in pretrain_dict.items(): if k[6:] in state_dict: model_dict[k[6:]] = v else: print(k) state_dict.update(model_dict) model.load_state_dict(state_dict) #model._arch_parameters = torch.load(pretrained_path)['state_dict_theta'] for step, (x_spt, y_spt, x_qry, y_qry) in enumerate(test_loader): x_spt, y_spt, x_qry, y_qry = x_spt.squeeze(0).to(device), y_spt.squeeze(0).to(device), \ x_qry.squeeze(0).to(device), y_qry.squeeze(0).to(device) for k in range(args.update_step_test): logits = model(x_spt, alphas=model.arch_parameters()) loss = criterion(logits, y_spt) inner_optimizer_w.zero_grad() inner_optimizer_theta.zero_grad() loss.backward() inner_optimizer_w.step() inner_optimizer_theta.step() genotype = model.genotype() logging.info(genotype) maml = Meta_decoding(args, criterion, genotype).to(device) #exit() #print(step) #print(genotype) for epoch in range(args.epoch): logging.info('--------- Epoch: {} ----------'.format(epoch)) accs_all_train = [] # # TODO: how to choose batch data to update theta? # valid_iterator = iter(train_loader) batch_time = utils.AverageMeter() data_time = utils.AverageMeter() update_w_time = utils.AverageMeter() end = time.time() for step, (x_spt, y_spt, x_qry, y_qry) in enumerate(train_loader): data_time.update(time.time() - end) x_spt, y_spt, x_qry, y_qry = x_spt.to(device), y_spt.to( device), x_qry.to(device), y_qry.to(device) # (x_search_spt, y_search_spt, x_search_qry, y_search_qry), valid_iterator = infinite_get(valid_iterator, train_loader) # x_search_spt, y_search_spt, x_search_qry, y_search_qry = x_search_spt.to(device), y_search_spt.to(device), x_search_qry.to(device), y_search_qry.to(device) accs, update_w_time = maml(x_spt, y_spt, x_qry, y_qry, update_w_time) accs_all_train.append(accs) batch_time.update(time.time() - end) end = time.time() writer.add_scalar('train/acc_iter', accs[-1], step + len(train_loader) * epoch) if step % args.report_freq == 0: logging.info( 'Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'W {update_w_time.val:.3f} ({update_w_time.avg:.3f})\t' 'training acc: {accs}'.format(epoch, step, len(train_loader), batch_time=batch_time, data_time=data_time, update_w_time=update_w_time, accs=accs)) if step % args.test_freq == 0: test_accs, test_stds, test_ci95 = meta_test( train_loader, test_loader, maml, device, epoch, writer) logging.info( '[Epoch: {}]\t Test acc: {}\t Test ci95: {}'.format( epoch, test_accs, test_ci95)) # Save the best meta model. new_pred = test_accs[-1] if new_pred > best_pred: is_best = True best_pred = new_pred else: is_best = False saver.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': maml.module.state_dict() if isinstance( maml, nn.DataParallel) else maml.state_dict(), 'best_pred': best_pred, }, is_best)
def do_train(train_loader, model, criterion, optimizer, epoch, args): batch_time = utils.AverageMeter('Time', ':6.3f') data_time = utils.AverageMeter('Data', ':6.3f') losses = utils.AverageMeter('Loss', ':.3f') top1 = utils.AverageMeter('Acc@1', ':6.2f') top5 = utils.AverageMeter('Acc@5', ':6.2f') learning_rate = utils.AverageMeter('LR', ':.4f') throughputs = utils.AverageMeter('ThroughPut', ':.2f') losses_id = utils.AverageMeter('L_ID', ':.3f') losses_mag = utils.AverageMeter('L_mag', ':.6f') progress_template = [ batch_time, data_time, throughputs, 'images/s', losses, losses_id, losses_mag, top1, top5, learning_rate ] progress = utils.ProgressMeter(len(train_loader), progress_template, prefix="Epoch: [{}]".format(epoch)) end = time.time() # update lr learning_rate.update(current_lr) for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) global iters iters += 1 input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output, x_norm = model(input, target) loss_id, loss_g, one_hot = criterion(output, target, x_norm) loss = loss_id + args.lambda_g * loss_g # measure accuracy and record loss acc1, acc5 = utils.accuracy(args, output[0], target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0], input.size(0)) top5.update(acc5[0], input.size(0)) losses_id.update(loss_id.item(), input.size(0)) losses_mag.update(args.lambda_g * loss_g.item(), input.size(0)) # compute gradient and do solver step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time duration = time.time() - end batch_time.update(duration) end = time.time() throughputs.update(args.batch_size / duration) if i % args.print_freq == 0: progress.display(i) debug_info(x_norm, args.l_a, args.u_a, args.l_margin, args.u_margin) if args.vis_mag: if (i > 10000) and (i % 100 == 0): x_norm = x_norm.detach().cpu().numpy() cos_theta = torch.masked_select( output[0], one_hot.bool()).detach().cpu().numpy() logit = torch.masked_select(F.softmax( output[0]), one_hot.bool()).detach().cpu().numpy() np.savez( '{}/vis/epoch_{}_iter{}'.format(args.pth_save_fold, epoch, i), x_norm, logit, cos_theta)
def run_train_epoch(model, optimizer, log_prior, dataloader, epoch, asr_decoder, trans_model, silence_ids, aligner, args): batch_time = utils.AverageMeter('Time', ':6.3f') losses = utils.AverageMeter('Loss', ':.4e') grad_norm = utils.AverageMeter('grad_norm', ':.4e') progress = utils.ProgressMeter(len(dataloader), batch_time, losses, grad_norm, prefix="Epoch: [{}]".format(epoch)) ce_criterion = nn.CrossEntropyLoss(ignore_index=-100, reduction='sum') if args.criterion == "mmi": criterion = ops.MMIFunction.apply else: criterion = ops.sMBRFunction.apply end = time.time() for i, batch in enumerate(dataloader): feat = batch["x"] label = batch["y"] num_frs = batch["num_frs"] utt_ids = batch["utt_ids"] aux = batch["aux"] #word labels for se loss x = feat.to(th.float32) y = label.long() x = x.cuda() y = y.cuda() prediction = model(x) ce_loss = ce_criterion(prediction.view(-1, prediction.shape[2]), y.view(-1)) loss = args.ce_ratio * ce_loss for j in range(len(num_frs)): loglike = prediction[j, :, :] loglike_j = loglike[:num_frs[j], :] loglike_j = loglike_j - log_prior text = th.from_numpy(aux[j][0][0].astype(int)).tolist() #text = ' '.join(str(k) for k in text) try: align_in = kaldi_matrix.Matrix( loglike_j.detach().cpu().numpy()) align_out = aligner.align(align_in, text) trans_ids = align_out["alignment"] if args.criterion == "mmi": se_loss = criterion(loglike_j, asr_decoder, trans_model, trans_ids) else: se_loss = criterion(loglike_j, asr_decoder, trans_model, trans_ids, args.criterion, silence_ids) loss += se_loss.cuda() except: print( "Warning: failed to align utterance {}, skip the utterance for SE loss" .format(utt_ids[j])) optimizer.zero_grad() loss.backward() # Gradient Clipping (th 5.0) norm = nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() grad_norm.update(norm) # update loss tot_frs = np.array(num_frs).sum() losses.update(loss.item() / tot_frs) # measure elapsed time batch_time.update(time.time() - end) # save model if hvd.rank() == 0 and i % args.save_freq == 0: checkpoint = {} checkpoint['model'] = model.state_dict() checkpoint['optimizer'] = optimizer.state_dict() output_file = args.exp_dir + '/model.se.' + str(i) + '.tar' th.save(checkpoint, output_file) if hvd.rank() == 0 and i % args.print_freq == 0: progress.print(i)
def do_train(train_loader, model, criterion, optimizer, grad_scaler, epoch, args): batch_time = utils.AverageMeter('Time', ':6.3f') data_time = utils.AverageMeter('Data', ':6.3f') losses = utils.AverageMeter('Loss', ':.3f') top1 = utils.AverageMeter('Acc@1', ':6.2f') learning_rate = utils.AverageMeter('LR', ':.4f') throughputs = utils.AverageMeter('ThroughPut', ':.2f') losses_id = utils.AverageMeter('L_ID', ':.3f') losses_mag = utils.AverageMeter('L_mag', ':.6f') progress_template = [batch_time, data_time, throughputs, 'images/s', losses, losses_id, losses_mag, top1, learning_rate] progress = utils.ProgressMeter( len(train_loader), progress_template, prefix="Epoch: [{}]".format(epoch)) end = time.time() # update lr learning_rate.update(current_lr) for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) global iters iters += 1 input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output with autocast(enabled=args.amp_mode): output, x_norm = model(input, target) # x_norm is not needed to be gathered, as feature x is in each rank target = ts.distributed.gather(target, dim=0) # loss with autocast(enabled=args.amp_mode): loss_id, loss_g, one_hot = criterion(output, target, x_norm) loss = loss_id + args.lambda_g * loss_g # compute gradient and do solver step optimizer.zero_grad() # backward grad_scaler.scale(loss).backward() # update weights grad_scaler.step(optimizer) grad_scaler.update() # syn for logging torch.cuda.synchronize() # measure elapsed time if args.rank == 0: duration = time.time() - end end = time.time() batch_time.update(duration) bs = args.batch_size throughputs.update(args.world_size * bs / duration) # measure accuracy and record loss output = ts.distributed.gather(output[0], dim=-1) acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0], input.size(0)) losses_id.update(loss_id.item(), input.size(0)) losses_mag.update(args.lambda_g*loss_g.item(), input.size(0)) if i % args.print_freq == 0 and args.rank == 0: progress.display(i) debug_info(x_norm, args.l_a, args.u_a, args.l_margin, args.u_margin)
def train(train_loader, val_loader, model, criterion, optimizer, epoch, converter): batch_time = utils.AverageMeter() data_time = utils.AverageMeter() losses = utils.AverageMeter() # Switch to train mode for p in model.parameters(): p.requires_grad = True model.train() end = time.time() for i, sample in enumerate( tqdm(train_loader, desc='Train Epoch {}'.format(epoch + 1))): # Aujust learning rate #scheduler.step() # Measure data loading time data_time.update(time.time() - end) # Zero out gradients so we can accumulate new ones over batches optimizer.zero_grad() # step 2. Get our inputs targets ready for the network. images, targets = sample batch_size = images.size(0) encoded_targets, target_lengths = converter.encode(targets) # step 3. Run out forward pass. images = images.to(device) log_probs = model(images) input_lengths = torch.full((batch_size, ), log_probs.size(0), dtype=torch.int) # step 4. Compute the loss, gradients, and update the parameters loss = criterion(log_probs, encoded_targets, input_lengths, target_lengths) / batch_size losses.update(loss.item()) model.zero_grad() loss.backward() # Do one step for multiple batches accumulated gradients are used optimizer.step() # Measure elapsed time batch_time.update(time.time() - end) end = time.time() if (i + 1) % args.display_interval == 0 or i == 0 or ( i + 1) == len(train_loader): print('\nTrain: [{}/{}]\t' 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' 'Load Data {data_time.val:.3f}s ({data_time.avg:.3f}s\t' 'Loss {loss.val:.6f} ({loss.avg:.6f})'.format( i + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)) # Evaluate on validation set val_acc = 0.0 val_loss = 100000.0 if (i + 1) % args.val_interval == 0 or (i + 1) == len(train_loader): with torch.no_grad(): val_acc, val_loss = validate(val_loader, model, criterion, epoch, converter) for p in model.parameters(): p.requires_grad = True model.train() # Remember best accuracy and save checkpoint global is_best, best_accuracy is_best = val_acc > 0.0 and val_acc >= best_accuracy best_accuracy = max(val_acc, best_accuracy) if (i + 1) % args.save_interval == 0 or (i + 1) == len(train_loader): save_checkpoint( { 'arch': args.arch, 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_accuracy': best_accuracy, 'loss': val_loss, 'optimizer': optimizer.state_dict(), }, i + 1, is_best, args.checkpoint) return losses.avg
def train(loader, ds_rd, model, criterion, optimizer, epoch, n_iter=-1, logger=None, opts=None, visualizer=None): ''' iter through epoch , return rst{'acc', loss'} each as list can be used outside for updating. :param loader: :param model: :param criterion: :param optimizer: :param epoch: for print infor :param n_iter: the iteration wanted, -1 for all iters :param opts: keep some additional controls :param visualizer: for visualizer :return: ''' batch_time = ut.AverageMeter() data_time = ut.AverageMeter() losses = ut.AverageMeter() acc = ut.AverageMeter() # switch to train mode model.train() end = time.time() li_loss = [] li_acc = [] for i, inp_dct in enumerate(loader): # get items if i >= n_iter and n_iter > 0: # break if iter is set and i is greater than that break input = inp_dct['pch'] target = inp_dct['hms'] # 14 x 64 x 1?? target_weight = inp_dct['joints_vis'] # measure data loading time weight, visible or not data_time.update(time.time() - end) # compute output outputs = model(input) # no need to cuda it? target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) if isinstance(outputs, list): # list multiple stage version loss = criterion(outputs[0], target, target_weight) for output in outputs[1:]: loss += criterion(output, target, target_weight) else: output = outputs loss = criterion(output, target, target_weight) # compute gradient and do update step optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss losses.update(loss.item(), input.size(0)) _, avg_acc, cnt, pred = accuracy( output.detach().cpu().numpy(), target.detach().cpu().numpy() ) # hm directly, with normalize with 1/10 dim, pck0.5, cnt: n_smp, pred acc.update(avg_acc, cnt) # keep average acc if visualizer and 0 == i % opts.update_html_freq: # update current result, get vis dict n_jt = ds_rd.joint_num_ori mod0 = opts.mod_src[0] mean = ds_rd.means[mod0] std = ds_rd.stds[mod0] img_patch_vis = ut.ts2cv2( input[0], mean, std) # to CV BGR, mean std control channel detach inside # pseudo change cm = getattr(cv2, ds_rd.dct_clrMap[mod0]) img_patch_vis = cv2.applyColorMap(img_patch_vis, cm)[..., ::-1] # RGB # get pred pred2d_patch = np.ones((n_jt, 3)) # 3rd for vis pred2d_patch[:, :2] = pred[0] / opts.out_shp[0] * opts.sz_pch[1] img_skel = vis.vis_keypoints(img_patch_vis, pred2d_patch, ds_rd.skels_idx) hm_gt = target[0].cpu().detach().numpy().sum(axis=0) # HXW hm_gt = ut.normImg(hm_gt) hm_pred = output[0].detach().cpu().numpy().sum(axis=0) hm_pred = ut.normImg(hm_pred) img_cb = vis.hconcat_resize([img_skel, hm_gt, hm_pred]) vis_dict = {'img_cb': img_cb} visualizer.display_current_results(vis_dict, epoch, False) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % opts.print_freq == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed {speed:.1f} samples/s\t' \ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i, len(loader), batch_time=batch_time, speed=input.size(0) / batch_time.val, data_time=data_time, loss=losses, acc=acc) logger.info(msg) li_loss.append(losses.val) # the current loss li_acc.append(acc.val) return {'losses': li_loss, 'accs': li_acc}
def train(args, model, optimizer, criterion, dataloader_train, dataloader_val, writer=None): best_pred, best_acc, best_jac, best_sen, best_spe = 0.0, 0.0, 0.0, 0.0, 0.0 best_epoch = 0 end_epoch = None step = 0 # tensorboard相关 end_index = None # 可以设为1,用于直接进入val过程,检查bug current_time = datetime.now().strftime('%b%d %H:%M:%S') with open("./logs/%s.txt" % (args.net_work), "a") as f: print(current_time, file=f) for epoch in range(args.num_epochs): if(epoch==end_epoch): break train_loss = u.AverageMeter() train_progressor = pb.Train_ProgressBar(mode='train', epoch=epoch, total_epoch=args.num_epochs,model_name=args.net_work, total=len(dataloader_train)*args.batch_size) lr = u.adjust_learning_rate(args, optimizer, epoch) model.train() for i, (data, label) in enumerate(dataloader_train): if(i==end_index): break train_progressor.current = i*args.batch_size if torch.cuda.is_available() and args.use_gpu: data = data.cuda() label = label.cuda() output = model(data) output = torch.sigmoid(output) loss_aux = criterion[0](output, label) loss_main = criterion[1](output, label) loss = loss_main + loss_aux train_loss.update(loss.item(), data.size(0)) train_progressor.current_loss = train_loss.avg train_progressor.current_lr = lr optimizer.zero_grad() loss.backward() optimizer.step() train_progressor() step += 1 if step % 10 == 0: writer.add_scalar('Train/loss_step', loss, step) train_progressor.done() writer.add_scalar('Train/loss_epoch', float(train_loss.avg), epoch) Dice, Acc, jaccard, Sensitivity, Specificity = val(args, model, dataloader_val) writer.add_scalar('Valid/Dice_val', Dice, epoch) writer.add_scalar('Valid/Acc_val', Acc, epoch) writer.add_scalar('Valid/Jac_val', jaccard, epoch) writer.add_scalar('Valid/Sen_val', Sensitivity, epoch) writer.add_scalar('Valid/Spe_val', Specificity, epoch) is_best = Dice > best_pred if is_best: best_pred = max(best_pred, Dice) best_jac = max(best_jac, jaccard) best_acc = max(best_acc, Acc) best_sen = max(best_sen, Sensitivity) best_spe = max(best_spe, Specificity) best_epoch = epoch+1 checkpoint_dir = os.path.join(args.save_model_path) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) checkpoint_latest_name = os.path.join(checkpoint_dir, 'checkpoint_latest.path.tar') u.save_checkpoint({ 'epoch': best_epoch, 'state_dict': model.state_dict(), 'best_dice': best_pred }, best_pred, epoch, is_best, checkpoint_dir, filename=checkpoint_latest_name) # 记录该折分割效果最好一次epoch的所有参数 best_indicator_message = "best pred in Epoch:{}\nDice={:.4f} Accuracy={:.4f} jaccard={:.4f} Sensitivity={:.4f} Specificity={:.4f}".format( best_epoch, best_pred, best_acc, best_jac, best_sen, best_spe) end_time = datetime.now().strftime('%b%d %H:%M:%S') with open("./logs/%s_best_indicator.txt" % (args.net_work), mode='a') as f: print("end time: "+end_time, file=f) print(best_indicator_message, file=f)
print('Test: [{0}][{1}/{2}] - loss = {3} , acc = {4}'.format( epoch, i, len(val_loader), test_loss.avg, test_acc.avg)) net.train() print('Test finished.') return test_acc.avg, test_loss.avg ### main training loop ### best_accuracy = 0 best_epoch = 0 step = 0 for epoch in range(0, args.num_epochs): train_loss = utils.AverageMeter() train_acc = utils.AverageMeter() batch_time = utils.AverageMeter() data_time = utils.AverageMeter() # learning rate decay scheduler.step() end = time.time() # train for one epoch for i, data in enumerate(train_loader): states = net.init_hidden(is_train=True) if args.arch == 'Video' or args.arch == 'Audio': # single modality
def do_train(train_loader, model, criterion, optimizer, grad_scaler, epoch, args): batch_time = utils.AverageMeter('Time', ':6.3f') data_time = utils.AverageMeter('Data', ':6.3f') losses = utils.AverageMeter('Loss', ':.3f') top1 = utils.AverageMeter('Acc@1', ':6.2f') learning_rate = utils.AverageMeter('LR', ':.4f') throughputs = utils.AverageMeter('ThroughPut', ':.2f') losses_id = utils.AverageMeter('L_ID', ':.3f') losses_mag = utils.AverageMeter('L_mag', ':.6f') progress_template = [ batch_time, data_time, throughputs, 'images/s', losses, losses_id, losses_mag, top1, learning_rate ] progress = utils.ProgressMeter(len(train_loader), progress_template, prefix="Epoch: [{}]".format(epoch)) end = time.time() # update lr learning_rate.update(current_lr) for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) global iters iters += 1 input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output with autocast(enabled=args.amp_mode): output, x_norm = model(input, target) # x_norm is not needed to be gathered, as feature x is in each rank target = mpu._gather(target, dim=0) # loss with autocast(enabled=args.amp_mode): loss_id, loss_g, one_hot = criterion(output, target, x_norm) loss = loss_id + args.lambda_g * loss_g * args.world_size # compute gradient and do solver step optimizer.zero_grad() # backward grad_scaler.scale(loss).backward() # update weights grad_scaler.step(optimizer) grad_scaler.update() # syn for logging torch.cuda.synchronize() # measure elapsed time if args.rank == 0: duration = time.time() - end end = time.time() batch_time.update(duration) bs = args.batch_size throughputs.update(args.world_size * bs / duration) # measure accuracy and record loss acc1, _ = mpu.accuracy(args, output, target, topk=(1, 1)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0], input.size(0)) losses_id.update(loss_id.item(), input.size(0)) losses_mag.update(args.lambda_g * loss_g.item(), input.size(0)) if i % args.print_freq == 0 and args.rank == 0: progress.display(i) debug_info(x_norm, args.l_a, args.u_a, args.l_margin, args.u_margin) if args.vis_mag: if (epoch == args.epochs - 1) and (i % 1000 == 0): one_hot = one_hot.bool() mask = torch.sum(one_hot, dim=1).bool() x_norm_cur_rank = torch.masked_select( x_norm.squeeze(), mask).detach().cpu().numpy() cos_theta_cur_rank = torch.masked_select( output[0], one_hot).detach().cpu().numpy() np.savez( '{}/vis/epoch_{}_iter{}_rank_{}'.format( args.pth_save_fold, epoch, i, args.rank), x_norm_cur_rank, cos_theta_cur_rank)
def run_train_epoch(model, optimizer, log_prior, dataloader, epoch, asr_decoder, trans_model, silence_ids, args): batch_time = utils.AverageMeter('Time', ':6.3f') losses = utils.AverageMeter('Loss', ':.4e') grad_norm = utils.AverageMeter('grad_norm', ':.4e') progress = utils.ProgressMeter(len(dataloader), batch_time, losses, grad_norm, prefix="Epoch: [{}]".format(epoch)) ce_criterion = nn.CrossEntropyLoss(ignore_index=-100, reduction='sum') if args.criterion == "mmi": se_criterion = ops.MMIFunction.apply else: se_criterion = ops.sMBRFunction.apply end = time.time() for i, batch in enumerate(dataloader, 0): feat = batch["x"] label = batch["y"] #pdf-ids for ce loss num_frs = batch["num_frs"] utt_ids = batch["utt_ids"] aux = batch["aux"] #trans_ids for se loss x = feat.to(th.float32) y = label.long() x = x.cuda() y = y.cuda() prediction = model(x) ce_loss = ce_criterion(prediction.view(-1, prediction.shape[2]), y.view(-1)) se_loss = 0.0 for j in range(len(num_frs)): log_like_j = prediction[j, :, :] log_like_j = log_like_j[:num_frs[j], :] log_like_j = log_like_j - log_prior #trans_id = label[j, :num_frs[j], 0].tolist() trans_id = th.from_numpy(aux[j][0][0].astype(int)).tolist() # print(len(trans_id), num_frs[j]) if args.criterion == "mmi": se_loss += se_criterion(log_like_j, asr_decoder, trans_model, trans_id) else: se_loss += se_criterion(log_like_j, asr_decoder, trans_model, trans_id, args.criterion, silence_ids) loss = se_loss.cuda() + args.ce_ratio * ce_loss optimizer.zero_grad() loss.backward() # Gradient Clipping (th 5.0) norm = nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() grad_norm.update(norm) # update loss tot_frs = np.array(num_frs).sum() losses.update(loss.item() / tot_frs) # measure elapsed time batch_time.update(time.time() - end) # save model if hvd.rank() == 0 and i % args.save_freq == 0: checkpoint = {} checkpoint['model'] = model.state_dict() checkpoint['optimizer'] = optimizer.state_dict() output_file = args.exp_dir + '/model.se.' + str(i) + '.tar' th.save(checkpoint, output_file) if hvd.rank() == 0 and i % args.print_freq == 0: progress.print(i)
def train_epoch(self): batch_time = utils.AverageMeter() data_time = utils.AverageMeter() losses = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() self.model.train() self.optim.zero_grad() end = time.time() for batch_idx, (imgs, target, img_files, class_ids) in tqdm.tqdm( enumerate(self.train_loader), total=len(self.train_loader), desc='Train epoch={}, iter={}'.format(self.epoch, self.iteration), ncols=80, leave=False): iteration = batch_idx + self.epoch * len(self.train_loader) data_time.update(time.time() - end) gc.collect() if self.iteration != 0 and (iteration - 1) != self.iteration: continue # for resuming self.iteration = iteration if (self.iteration + 1) % self.interval_validate == 0: self.validate() if self.cuda: imgs, target = imgs.cuda(), target.cuda(async=True) # pdb.set_trace() imgs, target = Variable(imgs), Variable(target) output, out1, out2, out3, out4 = self.model(imgs) loss = self.criterion(output, target) if np.isnan(float(loss.data[0])): raise ValueError('loss is nan while training') # measure accuracy and record loss prec1, prec5 = utils.accuracy(output.data, target.data, topk=(1, 5)) losses.update(loss.data[0], imgs.size(0)) top1.update(prec1[0], imgs.size(0)) top5.update(prec5[0], imgs.size(0)) self.optim.zero_grad() loss.backward() self.optim.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if self.iteration % self.print_freq == 0: log_str = 'Train: [{0}/{1}/{top1.count:}]\tepoch: {epoch:}\titer: {iteration:}\t' \ 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Data: {data_time.val:.3f} ({data_time.avg:.3f})\t' \ 'Loss: {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Prec@1: {top1.val:.3f} ({top1.avg:.3f})\t' \ 'Prec@5: {top5.val:.3f} ({top5.avg:.3f})\tlr {lr:.6f}'.format( batch_idx, len(self.train_loader), epoch=self.epoch, iteration=self.iteration, lr=self.optim.param_groups[0]['lr'], batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) torch.save(self.model.state_dict(), self.model_dict) print(log_str) self.print_log(log_str) if self.lr_scheduler is not None: self.lr_scheduler.step() # update lr log_str = 'Train_summary: [{0}/{1}/{top1.count:}]\tepoch: {epoch:}\titer: {iteration:}\t' \ 'Time: {batch_time.avg:.3f}\tData: {data_time.avg:.3f}\t' \ 'Loss: {loss.avg:.4f}\tPrec@1: {top1.avg:.3f}\tPrec@5: {top5.avg:.3f}\tlr {lr:.6f}'.format( batch_idx, len(self.train_loader), epoch=self.epoch, iteration=self.iteration, lr=self.optim.param_groups[0]['lr'], batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) print(log_str) self.print_log(log_str)
def train(self): batch_time = utils.AverageMeter() data_time = utils.AverageMeter() losses = utils.AverageMeter() D1 = utils.AverageMeter() EPE = utils.AverageMeter() # switch to train mode self.model.train() time_end = time.time() for i, (batch, filenames) in enumerate(self.dataloader_train): # measure data loading time assert batch.shape[1] >= 7 if (self.use_cuda): batch = batch[:, :7].cuda() imL = batch[:, :3] imR = batch[:, 3:6] dispL = batch[:, 6:7] imL = Variable(imL, volatile=False, requires_grad=False) imR = Variable(imR, volatile=False, requires_grad=False) dispL = Variable(dispL, volatile=False, requires_grad=False) data_time.update(time.time() - time_end) # compute output scale_dispLs, dispLs = self.model(imL, imR) # compute loss argst = { "disp_gt": dispL, "disps": dispLs, "scale_disps": scale_dispLs, "flag_smooth": True, } loss = self.lossfun(argst) losses.update(loss.data[0], imL.size(0)) # if(i < 5): # # visualize images # import matplotlib.pyplot as plt # row, col = 4, 3 # plt.subplot(row, col, 1); plt.imshow(imL[0].data.cpu().numpy().transpose(1, 2, 0)) # plt.subplot(row, col, 2); plt.imshow(imR[0].data.cpu().numpy().transpose(1, 2, 0)) # plt.subplot(row, col, 3); plt.imshow(dispL[0, 0].data.cpu().numpy()) # for i in range(len(dispLs)): # plt.subplot(row, col, 4+i); plt.imshow(dispLs[i][0, 0].data.cpu().numpy()) # plt.show() # compute gradient and do SGD step self.optim.zero_grad() loss.backward() self.optim.step() # measure accuracy d1, epe = self.accuracy(dispLs[0].data, dispL.data) D1.update(d1, imL.size(0)) EPE.update(epe, imL.size(0)) # measure elapsed time batch_time.update(time.time() - time_end) time_end = time.time() # 每十步输出一次 if i % self.args.print_freq == 0: # default=20 print('Train: [{0}][{1}/{2}] | ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) | ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) | ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) | ' 'D1 {D1.val:.3f} ({D1.avg:.3f}) | ' 'EPE {EPE.val:.3f} ({EPE.avg:.3f})'.format( self.epoch, i, len(self.dataloader_train), batch_time=batch_time, data_time=data_time, loss=losses, D1=D1, EPE=EPE)) msg = 'mean train loss: %.3f | mean D1: %.3f | mean EPE: %.3f' % ( losses.avg, D1.avg, EPE.avg) logging.info(msg) return losses.avg, EPE.avg, D1.avg
def validate(self): batch_time = utils.AverageMeter() losses = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() training = self.model.training self.model.eval() end = time.time() for batch_idx, (imgs, target, img_files, class_ids) in tqdm.tqdm( enumerate(self.val_loader), total=len(self.val_loader), desc='Valid iteration={} epoch={}'.format( self.iteration, self.epoch), ncols=80, leave=False): with torch.no_grad(): gc.collect() if self.cuda: imgs, target = imgs.cuda(), target.cuda(async=True) # pdb.set_trace() imgs = Variable(imgs, volatile=True) target = Variable(target, volatile=True) output, out1, out2, out3, out4 = self.model(imgs) loss = self.criterion(output, target) if np.isnan(float(loss.data[0])): raise ValueError('loss is nan while validating') # measure accuracy and record loss prec1, prec5 = utils.accuracy(output.data, target.data, topk=(1, 5)) losses.update(loss.data[0], imgs.size(0)) top1.update(prec1[0], imgs.size(0)) top5.update(prec5[0], imgs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % self.print_freq == 0: log_str = 'Test: [{0}/{1}/{top1.count:}]\tepoch: {epoch:}\titer: {iteration:}\t' \ 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss: {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Prec@1: {top1.val:.3f} ({top1.avg:.3f})\t' \ 'Prec@5: {top5.val:.3f} ({top5.avg:.3f})\t'.format( batch_idx, len(self.val_loader), epoch=self.epoch, iteration=self.iteration, batch_time=batch_time, loss=losses, top1=top1, top5=top5) print(log_str) self.print_log(log_str) if self.cmd == 'train': is_best = top1.avg > self.best_top1 self.best_top1 = max(top1.avg, self.best_top1) self.best_top5 = max(top5.avg, self.best_top5) log_str = 'Test_summary: [{0}/{1}/{top1.count:}] epoch: {epoch:} iter: {iteration:}\t' \ 'BestPrec@1: {best_top1:.3f}\tBestPrec@5: {best_top5:.3f}\t' \ 'Time: {batch_time.avg:.3f}\tLoss: {loss.avg:.4f}\t' \ 'Prec@1: {top1.avg:.3f}\tPrec@5: {top5.avg:.3f}\t'.format( batch_idx, len(self.val_loader), epoch=self.epoch, iteration=self.iteration, best_top1=self.best_top1, best_top5=self.best_top5, batch_time=batch_time, loss=losses, top1=top1, top5=top5) print(log_str) self.print_log(log_str) checkpoint_file = os.path.join(self.checkpoint_dir, 'checkpoint.pth.tar') torch.save( { 'epoch': self.epoch, 'iteration': self.iteration, 'arch': self.model.__class__.__name__, 'optim_state_dict': self.optim.state_dict(), 'model_state_dict': self.model.state_dict(), 'best_top1': self.best_top1, 'batch_time': batch_time, 'losses': losses, 'top1': top1, 'top5': top5, }, checkpoint_file) if is_best: shutil.copy( checkpoint_file, os.path.join(self.checkpoint_dir, 'model_best.pth.tar')) if (self.epoch + 1) % 10 == 0: # save each 10 epoch shutil.copy( checkpoint_file, os.path.join(self.checkpoint_dir, 'checkpoint-{}.pth.tar'.format(self.epoch))) if training: self.model.train()