def train(train_loader, model, criterion, optimizer, epoch, logger): average_meter = AverageMeter() model.train() # switch to train mode end = time.time() batch_num = len(train_loader) for i, (input, target) in enumerate(train_loader): # itr_count += 1 input, target = input.cuda(), target.cuda() # print('input size = ', input.size()) # print('target size = ', target.size()) torch.cuda.synchronize() data_time = time.time() - end # compute pred end = time.time() with torch.autograd.detect_anomaly(): pred_d, pred_ord = model(input) # @wx 注意输出 target_c = utils.get_labels_sid( args, target) # using sid, discretize the groundtruth loss = criterion(pred_ord, target_c) optimizer.zero_grad() loss.backward() # compute gradient and do SGD step optimizer.step() torch.cuda.synchronize() gpu_time = time.time() - end # measure accuracy and record loss result = Result() depth = utils.get_depth_sid(args, pred_d) result.evaluate(depth.data, target.data) average_meter.update(result, gpu_time, data_time, input.size(0)) end = time.time() if (i + 1) % args.print_freq == 0: print('=> output: {}'.format(output_directory)) print('Train Epoch: {0} [{1}/{2}]\t' 't_Data={data_time:.3f}({average.data_time:.3f}) ' 't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t' 'Loss={Loss:.5f} ' 'RMSE={result.rmse:.2f}({average.rmse:.2f}) ' 'RML={result.absrel:.2f}({average.absrel:.2f}) ' 'Log10={result.lg10:.3f}({average.lg10:.3f}) ' 'Delta1={result.delta1:.3f}({average.delta1:.3f}) ' 'Delta2={result.delta2:.3f}({average.delta2:.3f}) ' 'Delta3={result.delta3:.3f}({average.delta3:.3f})'.format( epoch, i + 1, len(train_loader), data_time=data_time, gpu_time=gpu_time, Loss=loss.item(), result=result, average=average_meter.average())) current_step = epoch * batch_num + i logger.add_scalar('Train/RMSE', result.rmse, current_step) logger.add_scalar('Train/rml', result.absrel, current_step) logger.add_scalar('Train/Log10', result.lg10, current_step) logger.add_scalar('Train/Delta1', result.delta1, current_step) logger.add_scalar('Train/Delta2', result.delta2, current_step) logger.add_scalar('Train/Delta3', result.delta3, current_step) avg = average_meter.average()
def validate(val_loader, model, epoch, logger): average_meter = AverageMeter() model.eval() # switch to evaluate mode end = time.time() skip = len(val_loader) // 9 # save images every skip iters img_list = [] for i, (input, target) in enumerate(val_loader): input, target = input.cuda(), target.cuda() torch.cuda.synchronize() data_time = time.time() - end # compute output end = time.time() with torch.no_grad(): pred, _ = model(input) torch.cuda.synchronize() gpu_time = time.time() - end # measure accuracy and record loss result = Result() pred = utils.get_depth_sid(args, pred) result.evaluate(pred.data, target.data) average_meter.update(result, gpu_time, data_time, input.size(0)) end = time.time() # save 8 images for visualization rgb = input if i == 0: img_merge = utils.merge_into_row(rgb, target, pred) elif (i < 8 * skip) and (i % skip == 0): row = utils.merge_into_row(rgb, target, pred) img_merge = utils.add_row(img_merge, row) elif i == 8 * skip: filename = output_directory + '/comparison_' + str(epoch) + '.png' utils.save_image(img_merge, filename) if (i + 1) % args.print_freq == 0: print('Test: [{0}/{1}]\t' 't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t' 'RMSE={result.rmse:.2f}({average.rmse:.2f}) ' 'RML={result.absrel:.2f}({average.absrel:.2f}) ' 'Log10={result.lg10:.3f}({average.lg10:.3f}) ' 'Delta1={result.delta1:.3f}({average.delta1:.3f}) ' 'Delta2={result.delta2:.3f}({average.delta2:.3f}) ' 'Delta3={result.delta3:.3f}({average.delta3:.3f})'.format( i + 1, len(val_loader), gpu_time=gpu_time, result=result, average=average_meter.average())) avg = average_meter.average() print('\n*\n' 'RMSE={average.rmse:.3f}\n' 'Rel={average.absrel:.3f}\n' 'Log10={average.lg10:.3f}\n' 'Delta1={average.delta1:.3f}\n' 'Delta2={average.delta2:.3f}\n' 'Delta3={average.delta3:.3f}\n' 't_GPU={time:.3f}\n'.format(average=avg, time=avg.gpu_time)) logger.add_scalar('Test/rmse', avg.rmse, epoch) logger.add_scalar('Test/Rel', avg.absrel, epoch) logger.add_scalar('Test/log10', avg.lg10, epoch) logger.add_scalar('Test/Delta1', avg.delta1, epoch) logger.add_scalar('Test/Delta2', avg.delta2, epoch) logger.add_scalar('Test/Delta3', avg.delta3, epoch) return avg, img_merge
def train(train_loader, model, criterion, optimizer, epoch, logger, device, opts, iteration): average_meter = AverageMeter() model.train() # switch to train mode end = time.time() batch_num = len(train_loader) for i, (input, target) in enumerate(train_loader): if i < iteration: continue input = input.to(device) target = target.to(device) if torch.cuda.is_available(): torch.cuda.synchronize() data_time = time.time() - end # compute pred end = time.time() with torch.autograd.detect_anomaly(): pred_d, pred_ord = model(input) target_c = utils.get_labels_sid( opts, target, device) # using sid, discretize the groundtruth loss = criterion(pred_ord, target_c) optimizer.zero_grad() loss.backward() # compute gradient and do SGD step optimizer.step() if torch.cuda.is_available(): torch.cuda.synchronize() gpu_time = time.time() - end # measure accuracy and record loss result = Result() depth = utils.get_depth_sid(opts, pred_d, device) result.evaluate(depth.data, target.data) average_meter.update(result, gpu_time, data_time, input.size(0)) end = time.time() if (i + 1) % opts.save_freq == 0: utils.save_checkpoint( { 'args': opts, 'epoch': epoch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, 'iteration': i + 1 }, False, epoch, output_directory) if (i + 1) % opts.print_freq == 0: print('Train Epoch: {0} [{1}/{2}]\t' 't_Data={data_time:.3f}({average.data_time:.3f}) ' 't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t' 'Loss={Loss:.5f} ' 'RMSE={result.rmse:.2f}({average.rmse:.2f}) ' 'RML={result.absrel:.2f}({average.absrel:.2f}) ' 'Log10={result.lg10:.3f}({average.lg10:.3f}) ' 'Delta1={result.delta1:.3f}({average.delta1:.3f}) ' 'Delta2={result.delta2:.3f}({average.delta2:.3f}) ' 'Delta3={result.delta3:.3f}({average.delta3:.3f})'.format( epoch, i + 1, len(train_loader), data_time=data_time, gpu_time=gpu_time, Loss=loss.item(), result=result, average=average_meter.average())) current_step = epoch * batch_num + i logger.add_scalar('Train/RMSE', result.rmse, current_step) logger.add_scalar('Train/rml', result.absrel, current_step) logger.add_scalar('Train/Log10', result.lg10, current_step) logger.add_scalar('Train/Delta1', result.delta1, current_step) logger.add_scalar('Train/Delta2', result.delta2, current_step) logger.add_scalar('Train/Delta3', result.delta3, current_step) print("GPU:", torch.cuda.memory_allocated()) if torch.cuda.is_available(): torch.cuda.empty_cache() avg = average_meter.average()