def main(): global args, best_result, output_directory # set random seed torch.manual_seed(args.manual_seed) torch.cuda.manual_seed(args.manual_seed) np.random.seed(args.manual_seed) random.seed(args.manual_seed) print("Let's use GPU ", torch.cuda.current_device()) val_loader = create_loader(args) output_directory = utils.get_output_directory(args) print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load( '/share2/public/fail_safe/kitti/DeepBlur/result/kitti/run_7/model_best.pth.tar' ) # solve 'out of memory' model_sd = checkpoint['model'].state_dict() model.load_state_dict(model_sd) print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) test(val_loader, model) # clear memory del checkpoint # del model_dict torch.cuda.empty_cache()
def main(): global args, best_result, output_directory, train_csv, test_csv sparsifier = None max_depth = args.max_depth if args.max_depth >= 0.0 else np.inf if args.sparsifier == UniformSampling.name: sparsifier = UniformSampling(num_samples=args.num_samples, max_depth=max_depth) elif args.sparsifier == SimulatedStereo.name: sparsifier = SimulatedStereo(num_samples=args.num_samples, max_depth=max_depth) # create results folder, if not already exists output_directory = utils.get_output_directory(args) if not os.path.exists(output_directory): os.makedirs(output_directory) train_csv = os.path.join(output_directory, 'train.csv') test_csv = os.path.join(output_directory, 'test.csv') best_txt = os.path.join(output_directory, 'best.txt') # define loss function (criterion) and optimizer if args.criterion == 'l2': criterion = criteria.MaskedMSELoss().cuda() elif args.criterion == 'l1': criterion = criteria.MaskedL1Loss().cuda() out_channels = 1 # Data loading code print("=> creating data loaders ...") traindir = os.path.join('data', args.data, 'train') valdir = os.path.join('data', args.data, 'val') train_dataset = NYUDataset(traindir, type='train', modality=args.modality, sparsifier=sparsifier) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None) # set batch size to be 1 for validation val_dataset = NYUDataset(valdir, type='val', modality=args.modality, sparsifier=sparsifier) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) print("=> data loaders created.") # evaluation mode if args.evaluate: best_model_filename = os.path.join(output_directory, 'model_best.pth.tar') assert os.path.isfile(best_model_filename), \ "=> no best model found at '{}'".format(best_model_filename) print("=> loading best model '{}'".format(best_model_filename)) checkpoint = torch.load(best_model_filename) args.start_epoch = checkpoint['epoch'] best_result = checkpoint['best_result'] model = checkpoint['model'] print("=> loaded best model (epoch {})".format(checkpoint['epoch'])) validate(val_loader, model, checkpoint['epoch'], write_to_file=False) return # optionally resume from a checkpoint elif args.resume: assert os.path.isfile(args.resume), \ "=> no checkpoint found at '{}'".format(args.resume) print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) # create new model else: # define model print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder)) in_channels = len(args.modality) if args.arch == 'resnet50': model = ResNet(layers=50, decoder=args.decoder, in_channels=in_channels, out_channels=out_channels, pretrained=args.pretrained) elif args.arch == 'resnet18': model = ResNet(layers=18, decoder=args.decoder, in_channels=in_channels, out_channels=out_channels, pretrained=args.pretrained) print("=> model created.") optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # create new csv files with only header with open(train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() with open(test_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() # model = torch.nn.DataParallel(model).cuda() model = model.cuda() print(model) print("=> model transferred to GPU.") for epoch in range(args.start_epoch, args.epochs): utils.adjust_learning_rate(optimizer, epoch, args.lr) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set result, img_merge = validate(val_loader, model, epoch) # remember best rmse and save checkpoint is_best = result.rmse < best_result.rmse if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write( "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n" .format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) utils.save_checkpoint( { 'args': args, 'epoch': epoch, 'arch': args.arch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch, output_directory)
def main(): global args, best_result, output_directory, train_csv, test_csv # evaluation mode start_epoch = 0 if args.evaluate: assert os.path.isfile(args.evaluate), \ "=> no best model found at '{}'".format(args.evaluate) print("=> loading best model '{}'".format(args.evaluate)) checkpoint = torch.load(args.evaluate) output_directory = os.path.dirname(args.evaluate) args = checkpoint['args'] start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] print("=> loaded best model (epoch {})".format(checkpoint['epoch'])) _, val_loader = create_data_loaders(args) args.evaluate = True validate(val_loader, model, checkpoint['epoch'], write_to_file=False) return elif args.crossTrain: print("Retraining loaded model on current input parameters") train_loader, val_loader = create_data_loaders(args) checkpoint = torch.load(args.crossTrain) model = checkpoint['model'] optimizer = torch.optim.SGD(model.parameters(), args.lr, \ momentum=args.momentum, weight_decay=args.weight_decay) model = model.cuda() # optionally resume from a checkpoint elif args.resume: chkpt_path = args.resume assert os.path.isfile(chkpt_path), \ "=> no checkpoint found at '{}'".format(chkpt_path) print("=> loading checkpoint '{}'".format(chkpt_path)) checkpoint = torch.load(chkpt_path) args = checkpoint['args'] start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] output_directory = os.path.dirname(os.path.abspath(chkpt_path)) print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) train_loader, val_loader = create_data_loaders(args) args.resume = True # create new model else: train_loader, val_loader = create_data_loaders(args) print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder)) in_channels = len(args.modality) if args.arch == 'resnet50': model = ResNet(layers=50, decoder=args.decoder, output_size=train_loader.dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet18': model = ResNet(layers=18, decoder=args.decoder, output_size=train_loader.dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) print("=> model created.") optimizer = torch.optim.SGD(model.parameters(), args.lr, \ momentum=args.momentum, weight_decay=args.weight_decay) # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training model = model.cuda() # define loss function (criterion) and optimizer if args.criterion == 'l2': criterion = criteria.MaskedMSELoss().cuda() elif args.criterion == 'l1': criterion = criteria.MaskedL1Loss().cuda() # create results folder, if not already exists output_directory = utils.get_output_directory(args) if not os.path.exists(output_directory): os.makedirs(output_directory) train_csv = os.path.join(output_directory, 'train.csv') test_csv = os.path.join(output_directory, 'test.csv') best_txt = os.path.join(output_directory, 'best.txt') # create new csv files with only header if not args.resume: with open(train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() with open(test_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for epoch in range(start_epoch, args.epochs): utils.adjust_learning_rate(optimizer, epoch, args.lr) train(train_loader, model, criterion, optimizer, epoch) # train for one epoch result, img_merge = validate(val_loader, model, epoch) # evaluate on validation set # remember best rmse and save checkpoint is_best = result.rmse < best_result.rmse if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write( "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n" .format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) utils.save_checkpoint( { 'args': args, 'epoch': epoch, 'arch': args.arch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch, output_directory)
def main(): global args, best_result, output_directory, train_csv, test_csv # 如果有多GPU 使用多GPU训练 if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") args.batch_size = args.batch_size * torch.cuda.device_count() else: print("Let's use", torch.cuda.current_device()) # evaluation mode start_epoch = 0 if args.evaluate: assert os.path.isfile(args.evaluate), \ "=> no best model found at '{}'".format(args.evaluate) print("=> loading best model '{}'".format(args.evaluate)) checkpoint = torch.load(args.evaluate) output_directory = os.path.dirname(args.evaluate) args = checkpoint['args'] start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] print("=> loaded best model (epoch {})".format(checkpoint['epoch'])) _, val_loader = create_data_loaders(args) args.evaluate = True validate(val_loader, model, checkpoint['epoch'], write_to_file=False) return # optionally resume from a checkpoint elif args.resume: assert os.path.isfile(args.resume), \ "=> no checkpoint found at '{}'".format(args.resume) print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args = checkpoint['args'] start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] output_directory = os.path.dirname(os.path.abspath(args.resume)) print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) train_loader, val_loader = create_data_loaders(args) args.resume = True # create new model else: train_loader, val_loader = create_data_loaders(args) print("=> creating Model ({})".format(args.arch)) in_channels = len(args.modality) if args.arch == 'resnet50': model = models.resnet50(pretrained=True) elif args.arch == 'resnet18': model = models.resnet18(pretrained=True) print("=> model created.") optimizer = torch.optim.SGD(model.parameters(), args.lr, \ momentum=args.momentum, weight_decay=args.weight_decay) # for multi-gpu training if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model).cuda() else: model = model.cuda() # define loss function (criterion) and optimizer if args.criterion == 'l2': criterion = criteria.MaskedMSELoss().cuda() elif args.criterion == 'l1': criterion = criteria.MaskedL1Loss().cuda() elif args.criterion == 'berHu': criterion = criteria.berHuLoss().cuda() # create results folder, if not already exists output_directory = utils.get_output_directory(args) if not os.path.exists(output_directory): os.makedirs(output_directory) best_txt = os.path.join(output_directory, 'best.txt') log_path = os.path.join( output_directory, 'logs', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname()) if os.path.isdir(log_path): shutil.rmtree(log_path) os.makedirs(log_path) logger = SummaryWriter(log_path) for epoch in range(start_epoch, args.epochs): utils.adjust_learning_rate(optimizer, epoch, args.lr) train(train_loader, model, criterion, optimizer, epoch, logger) # train for one epoch result, img_merge = validate(val_loader, model, epoch, logger) # evaluate on validation set # remember best rmse and save checkpoint is_best = result.rmse < best_result.rmse if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write( "epoch={}\nrmse={:.3f}\nrml={:.3f}\nlog10={:.3f}\nDelta1={:.3f}\nDelta2={:.3f}\nDelta3={:.3f}\nt_gpu={:.4f}\n" .format(epoch, result.rmse, result.absrel, result.lg10, result.delta1, result.delta2, result.delta3, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) utils.save_checkpoint( { 'args': args, 'epoch': epoch, 'arch': args.arch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch, output_directory)
def main(): global args, best_result, output_directory, train_csv, test_csv # evaluation mode if args.evaluate: # Data loading code print("=> creating data loaders...") valdir = os.path.join('..', 'data', args.data, 'val') if args.data == 'nyudepthv2': from dataloaders.nyu import NYUDataset val_dataset = NYUDataset(valdir, split='val', modality=args.modality) else: raise RuntimeError('Dataset not found.') # set batch size to be 1 for validation val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) print("=> data loaders created.") assert os.path.isfile(args.evaluate), \ "=> no model found at '{}'".format(args.evaluate) print("=> loading model '{}'".format(args.evaluate)) checkpoint = torch.load(args.evaluate) if type(checkpoint) is dict: args.start_epoch = checkpoint['epoch'] best_result = checkpoint['best_result'] model = checkpoint['model'] print("=> loaded best model (epoch {})".format( checkpoint['epoch'])) else: model = checkpoint args.start_epoch = 0 output_directory = os.path.dirname(args.evaluate) validate(val_loader, model, args.start_epoch, write_to_file=False) return start_epoch = 0 if args.train: train_loader, val_loader = create_data_loaders(args) print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder)) model = models.MobileNetSkipAdd( output_size=train_loader.dataset.output_size) print("=> model created.") optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training model = model.cuda() # define loss function (criterion) and optimizer if args.criterion == 'l2': criterion = criteria.MaskedMSELoss().cuda() elif args.criterion == 'l1': criterion = criteria.MaskedL1Loss().cuda() # create results folder, if not already exists output_directory = utils.get_output_directory(args) if not os.path.exists(output_directory): os.makedirs(output_directory) train_csv = os.path.join(output_directory, 'train.csv') test_csv = os.path.join(output_directory, 'test.csv') best_txt = os.path.join(output_directory, 'best.txt') # create new csv files with only header if not args.resume: with open(train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() with open(test_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for epoch in range(start_epoch, args.epochs): utils.adjust_learning_rate(optimizer, epoch, args.lr) train(train_loader, model, criterion, optimizer, epoch) # train for one epoch result, img_merge = validate(val_loader, model, epoch) # evaluate on validation set # remember best rmse and save checkpoint is_best = result.rmse < best_result.rmse if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write( "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n" .format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) utils.save_checkpoint( { 'args': args, 'epoch': epoch, 'arch': args.arch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch, output_directory)
def main(): global args, best_result, output_directory # set random seed torch.manual_seed(args.manual_seed) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") args.batch_size = args.batch_size * torch.cuda.device_count() else: print("Let's use GPU ", torch.cuda.current_device()) train_loader, val_loader = create_loader(args) if args.resume: assert os.path.isfile(args.resume), \ "=> no checkpoint found at '{}'".format(args.resume) print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] optimizer = checkpoint['optimizer'] # model_dict = checkpoint['model'].module.state_dict() # to load the trained model using multi-GPUs # model = FCRN.ResNet(output_size=train_loader.dataset.output_size, pretrained=False) # model.load_state_dict(model_dict) # solve 'out of memory' model = checkpoint['model'] print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) # clear memory del checkpoint # del model_dict torch.cuda.empty_cache() else: print("=> creating Model") model = FCRN.ResNet(output_size=train_loader.dataset.output_size) print("=> model created.") start_epoch = 0 # different modules have different learning rate train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] optimizer = torch.optim.SGD(train_params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # You can use DataParallel() whether you use Multi-GPUs or not model = nn.DataParallel(model).cuda() # when training, use reduceLROnPlateau to reduce learning rate scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=args.lr_patience) # loss function criterion = criteria.MaskedL1Loss() # create directory path output_directory = utils.get_output_directory(args) if not os.path.exists(output_directory): os.makedirs(output_directory) best_txt = os.path.join(output_directory, 'best.txt') config_txt = os.path.join(output_directory, 'config.txt') # write training parameters to config file if not os.path.exists(config_txt): with open(config_txt, 'w') as txtfile: args_ = vars(args) args_str = '' for k, v in args_.items(): args_str = args_str + str(k) + ':' + str(v) + ',\t\n' txtfile.write(args_str) # create log log_path = os.path.join( output_directory, 'logs', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname()) if os.path.isdir(log_path): shutil.rmtree(log_path) os.makedirs(log_path) logger = SummaryWriter(log_path) for epoch in range(start_epoch, args.epochs): # remember change of the learning rate for i, param_group in enumerate(optimizer.param_groups): old_lr = float(param_group['lr']) logger.add_scalar('Lr/lr_' + str(i), old_lr, epoch) train(train_loader, model, criterion, optimizer, epoch, logger) # train for one epoch result, img_merge = validate(val_loader, model, epoch, logger) # evaluate on validation set # remember best rmse and save checkpoint is_best = result.rmse < best_result.rmse if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write( "epoch={}, rmse={:.3f}, rml={:.3f}, log10={:.3f}, d1={:.3f}, d2={:.3f}, dd31={:.3f}, " "t_gpu={:.4f}".format(epoch, result.rmse, result.absrel, result.lg10, result.delta1, result.delta2, result.delta3, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) # save checkpoint for each epoch utils.save_checkpoint( { 'args': args, 'epoch': epoch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch, output_directory) # when rml doesn't fall, reduce learning rate scheduler.step(result.absrel) logger.close()
def main(): print('Testing data on ' + args.camera + '!') assert args.data == 'nyudepthv2', '=> only nyudepthv2 ' \ 'available at this ' \ 'point' to_tensor = transforms.ToTensor() assert not ( args.camera == 'webcam' and not args.modality == 'rgb'), '=> webcam only accept RGB ' \ 'model' output_directory = utils.get_output_directory(args) best_model_filename = os.path.join(output_directory, 'model_best.pth.tar') assert os.path.isfile(best_model_filename), \ "=> no best model found at '{}'".format( best_model_filename) print("=> loading best model '{}'".format( best_model_filename)) checkpoint = torch.load(best_model_filename) args.start_epoch = checkpoint['epoch'] model = checkpoint['model'] model.eval() switch = True if args.camera == 'kinect': kinect = PyKinectRuntime.PyKinectRuntime( PyKinectV2.FrameSourceTypes_Color | PyKinectV2.FrameSourceTypes_Depth) counter = 0 assert not kinect._sensor is None, '=> No Kinect ' \ 'device ' \ 'detected!' while True: if kinect.has_new_color_frame() and \ kinect.has_new_depth_frame(): bgra_frame = kinect.get_last_color_frame() bgra_frame = bgra_frame.reshape(( kinect.color_frame_desc.Height, kinect.color_frame_desc.Width, 4), order='C') rgb_frame = cv2.cvtColor(bgra_frame, cv2.COLOR_BGRA2RGB) depth_frame = kinect.get_last_depth_frame() merged_image, rmse = depth_estimate(model, rgb_frame, depth_frame, save=False) merged_image_bgr = cv2.cvtColor( merged_image.astype('uint8'), cv2.COLOR_RGB2BGR, switch) switch = False cv2.imshow('my webcam', merged_image_bgr.astype('uint8')) if counter == 15: print('RMSE = ' + str(rmse)) counter = counter + 1 if counter == 16: counter = 0 if cv2.waitKey(1) == 27: break elif args.camera == 'webcam': cam = cv2.VideoCapture(0) while True: ret_val, img = cam.read() img = cv2.flip(img, 1) rgb = cv2.cvtColor(np.array(img), cv2.COLOR_BGRA2RGB) transform = transforms.Compose([ transforms.Resize([228, 304]), ]) rgb_image = transform(rgb) if args.modality == 'rgbd': assert '=> can\'t test webcam with depth ' \ 'information!' rgb_np = np.asfarray(rgb_image, dtype='float') / 255 input_tensor = to_tensor(rgb_np) while input_tensor.dim() < 4: input_tensor = input_tensor.unsqueeze(0) input_tensor = input_tensor.cuda() torch.cuda.synchronize() end = time.time() with torch.no_grad(): pred = model(input_tensor) torch.cuda.synchronize() gpu_time = time.time() - end pred_depth = np.squeeze(pred.cpu().numpy()) d_min = np.min(pred_depth) d_max = np.max(pred_depth) pred_color_map = color_map(pred_depth, d_min, d_max, plt.cm.viridis) merged_image = np.hstack( [rgb_image, pred_color_map]) merged_image_bgr = cv2.cvtColor( merged_image.astype('uint8'), cv2.COLOR_RGB2BGR) cv2.imshow('my webcam', merged_image_bgr.astype('uint8')) if cv2.waitKey(1) == 27: break # esc to quit else: file_name = args.kinectdata + '.p' pickle_path = os.path.join('CameraData', file_name) print(pickle_path) if not os.path.exists('CameraData'): assert '=>do data find at ' + pickle_path f = open(pickle_path, 'rb') pickle_file = pickle.load(f) f.close() bgr_frame = pickle_file['rgb'] depth = pickle_file['depth'] rgb_frame = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB) merged_image, rmse = depth_estimate(model, rgb_frame, depth, save=True, switch=True) plt.figure('Merged Image') plt.imshow(merged_image.astype('uint8')) plt.show() print('RMSE = ' + str(rmse)) cv2.destroyAllWindows()
def main(): global args, best_result, output_directory, train_csv, test_csv # Random seed setting torch.manual_seed(16) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Data loading code print("=> creating data loaders...") data_dir = '/media/vasp/Data2/Users/vmhp806/depth-estimation' valdir = os.path.join(data_dir, 'data', args.data, 'val') traindir = os.path.join(data_dir, 'data', args.data, 'train') if args.data == 'nyu' or args.data == 'uow_dataset': from dataloaders.nyu import NYUDataset val_dataset = NYUDataset(valdir, split='val', modality=args.modality) #val_dataset = nc.SafeDataset(val_dataset) train_dataset = NYUDataset(traindir, split='train', modality=args.modality) #train_dataset = nc.SafeDataset(train_dataset) else: raise RuntimeError('Dataset not found.') # set batch size to be 1 for validation val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True, collate_fn=my_collate) if not args.evaluate: train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, collate_fn=my_collate) print("=> data loaders created.") # evaluation mode if args.evaluate: assert os.path.isfile(args.evaluate), \ "=> no model found at '{}'".format(args.evaluate) print("=> loading model '{}'".format(args.evaluate)) checkpoint = torch.load(args.evaluate) if type(checkpoint) is dict: args.start_epoch = checkpoint['epoch'] best_result = checkpoint['best_result'] model = checkpoint['model'] print("=> loaded best model (epoch {})".format( checkpoint['epoch'])) else: model = checkpoint args.start_epoch = 0 output_directory = os.path.dirname(args.evaluate) if args.predict: predict(val_loader, model, output_directory) else: validate(val_loader, model, args.start_epoch, write_to_file=False) return # optionally resume from a checkpoint elif args.resume: chkpt_path = args.resume assert os.path.isfile(chkpt_path), \ "=> no checkpoint found at '{}'".format(chkpt_path) print("=> loading checkpoint " "'{}'".format(chkpt_path)) checkpoint = torch.load(chkpt_path) #args = checkpoint['args'] start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] optimizer = torch.optim.SGD(model.parameters(), lr=0.9) optimizer.load_state_dict(checkpoint['optimizer']) #optimizer = checkpoint['optimizer'] output_directory = os.path.dirname(os.path.abspath(chkpt_path)) print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) args.resume = True else: print("=> creating Model ({} - {}) ...".format(args.arch, args.decoder)) #in_channels = len(args.modality) if args.arch == 'mobilenet-skipconcat': model = models.MobileNetSkipConcat( decoder=args.decoder, output_size=train_loader.dataset.output_size) elif args.arch == 'mobilenet-skipadd': model = models.MobileNetSkipAdd( decoder=args.decoder, output_size=train_loader.dataset.output_size) elif args.arch == 'resnet18-skipconcat': model = models.ResNetSkipConcat( layers=18, decoder=args.decoder, output_size=train_loader.dataset.output_size) elif args.arch == 'resnet18-skipadd': model = models.ResNetSkipAdd( layers=18, output_size=train_loader.dataset.output_size) else: raise Exception('Invalid architecture') print("=> model created.") optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training model = model.cuda() start_epoch = 0 # define loss function (criterion) and optimizer if args.criterion == 'l2': criterion = criteria.MaskedMSELoss().cuda() elif args.criterion == 'l1': criterion = criteria.MaskedL1Loss().cuda() # create results folder, if not already exists output_directory = utils.get_output_directory(args) if not os.path.exists(output_directory): os.makedirs(output_directory) train_csv = os.path.join(output_directory, 'train.csv') test_csv = os.path.join(output_directory, 'test.csv') best_txt = os.path.join(output_directory, 'best.txt') # create new csv files with only header if not args.resume: with open(train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() with open(test_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() #start_epoch = 0 for epoch in range(start_epoch, args.epochs): utils.adjust_learning_rate(optimizer, epoch, args.lr) train(train_loader, model, criterion, optimizer, epoch) # train for one epoch result, img_merge = validate( val_loader, model, epoch, write_to_file=True) # evaluate on validation set # remember best rmse and save checkpoint is_best = result.rmse < best_result.rmse if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write( "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n" .format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) utils.save_checkpoint( { 'args': args, 'epoch': epoch, #'arch': args.arch, 'model': model, 'best_result': best_result, 'optimizer': optimizer.state_dict(), }, is_best, epoch, output_directory)
def main(): torch.cuda.set_device(config.cuda_id) global args, best_result, output_directory, train_csv, test_csv, batch_num, best_txt best_result = Result() best_result.set_to_worst() batch_num = 0 output_directory = utils.get_output_directory(args) #-----------------# # pytorch version # #-----------------# try: torch._utils._rebuild_tensor_v2 except AttributeError: def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks): tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride) tensor.requires_grad = requires_grad tensor._backward_hooks = backward_hooks return tensor torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2 if not os.path.exists(output_directory): os.makedirs(output_directory) train_csv = os.path.join(output_directory, 'train.csv') test_csv = os.path.join(output_directory, 'test.csv') best_txt = os.path.join(output_directory, 'best.txt') nowTime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') file = open(namefile, 'a+') file.writelines( str("====================================================") + str(nowTime) + '\n') file.writelines(str("Cuda_id: ") + str(config.cuda_id) + '\n') file.writelines(str("NAME: ") + str(config.name) + '\n') file.writelines(str("Description: ") + str(config.description) + '\n') file.writelines( str("model: ") + str(args.arch) + '\n' + str("loss_final: ") + str(args.criterion) + '\n' + str("loss_1: ") + str(config.LOSS_1) + '\n' + str("batch_size:") + str(args.batch_size) + '\n') file.writelines(str("zoom_scale: ") + str(config.zoom_scale) + '\n') file.writelines(str("------------------------") + '\n') file.writelines(str("Train_dataste: ") + str(config.train_dir) + '\n') file.writelines(str("Validation_dataste: ") + str(config.val_dir) + '\n') file.writelines(str("------------------------") + '\n') file.writelines(str("Input_type: ") + str(config.input) + '\n') file.writelines(str("target_type: ") + str(config.target) + '\n') file.writelines(str("LOSS--------------------") + '\n') file.writelines(str("Loss_num: ") + str(config.loss_num) + '\n') file.writelines( str("loss_final: ") + str(args.criterion) + '\n' + str("loss_1: ") + str(config.LOSS_1) + '\n') file.writelines( str("loss_0_weight: ") + str(config.LOSS_0_weight) + '\n' + str("loss_1_weight: ") + str(config.LOSS_1_weight) + '\n') file.writelines( str("weight_GT_canny: ") + str(config.weight_GT_canny_loss) + '\n' + str("weight_GT_sobel: ") + str(config.weight_GT_sobel_loss) + '\n' + str("weight_rgb_sobel: ") + str(config.weight_rgb_sobel_loss) + '\n') file.writelines(str("------------------------") + '\n') file.writelines(str("target: ") + str(config.target) + '\n') file.writelines(str("data_loader_type: ") + str(config.data_loader) + '\n') file.writelines(str("lr: ") + str(config.Init_lr) + '\n') file.writelines(str("save_fc: ") + str(config.save_fc) + '\n') file.writelines(str("Max epoch: ") + str(config.epoch) + '\n') file.close() # define loss function (criterion) and optimizer,定义误差函数和优化器 if args.criterion == 'l2': criterion = criteria.MaskedMSELoss().cuda() elif args.criterion == 'l1': criterion = criteria.MaskedL1Loss().cuda() elif args.criterion == 'l1_canny': criterion = criteria.MaskedL1_cannyLoss().cuda() #SOBEL elif args.criterion == 'l1_from_rgb_sobel': criterion = criteria.MaskedL1_from_rgb_sobel_Loss().cuda() elif args.criterion == 'l1_from_GT_rgb_sobel': criterion = criteria.MaskedL1_from_GT_rgb_sobel_Loss().cuda() elif args.criterion == 'l1_from_GT_sobel': criterion = criteria.MaskedL1_from_GT_sobel_Loss().cuda() elif args.criterion == 'l2_from_GT_sobel_Loss': criterion = criteria.MaskedL2_from_GT_sobel_Loss().cuda() #CANNY elif args.criterion == 'l1_canny_from_GT_canny': criterion = criteria.MaskedL1_canny_from_GT_Loss().cuda() # Data loading code print("=> creating data loaders ...") train_dir = config.train_dir val_dir = config.val_dir train_dataset = YT_dataset(train_dir, config, is_train_set=True) val_dataset = YT_dataset(val_dir, config, is_train_set=False) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None, worker_init_fn=lambda work_id: np.random.seed(work_id)) # worker_init_fn ensures different sampling patterns for each data loading thread # set batch size to be 1 for validation val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) print("=> data loaders created.") if args.evaluate: best_model_filename = os.path.join(output_directory, 'model_best.pth.tar') assert os.path.isfile(best_model_filename), \ "=> no best model found at '{}'".format(best_model_filename) print("=> loading best model '{}'".format(best_model_filename)) checkpoint = torch.load(best_model_filename) args.start_epoch = checkpoint['epoch'] best_result = checkpoint['best_result'] model = checkpoint['model'] print("=> loaded best model (epoch {})".format(checkpoint['epoch'])) validate(val_loader, model, checkpoint['epoch'], 1, write_to_file=False) return elif args.test: print("testing...") best_model_filename = best_model_dir assert os.path.isfile(best_model_filename), \ "=> no best model found at '{}'".format(best_model_filename) print("=> loading best model '{}'".format(best_model_filename)) checkpoint = torch.load(best_model_filename) args.start_epoch = checkpoint['epoch'] best_result = checkpoint['best_result'] model = checkpoint['model'] print("=> loaded best model (epoch {})".format(checkpoint['epoch'])) optimizer = checkpoint['optimizer'] for state in optimizer.state.values(): for k, v in state.items(): print(type(v)) if torch.is_tensor(v): state[k] = v.cuda() #test(val_loader, model, checkpoint['epoch'], write_to_file=False) test(model) return elif args.resume: assert os.path.isfile(config.resume_model_dir), \ "=> no checkpoint found at '{}'".format(config.resume_model_dir) print("=> loading checkpoint '{}'".format(config.resume_model_dir)) best_model_filename = config.resume_model_dir checkpoint = torch.load(best_model_filename) args.start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] for state in optimizer.state.values(): for k, v in state.items(): #print(type(v)) if torch.is_tensor(v): state[k] = v.cuda(config.cuda_id) print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) else: print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder)) if config.input == 'RGBT': in_channels = 4 elif config.input == 'YT': in_channels = 2 else: print("Input type is wrong !") return 0 if args.arch == 'resnet50': #调用ResNet的定义实例化model,这里的in_channels是 model = ResNet(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_deconv1_loss0': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_with_deconv(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_deconv1_loss1': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_with_deconv_loss( layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_direct_deconv1_loss1': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_with_direct_deconv( layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_1': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_1(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_2': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_2(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_3': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_3(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_3_1': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_3_1(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_3_2': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_3_2(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_3_3': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_3_3(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_4': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_4(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_5': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_5(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_7': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_7(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_8': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_8(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_9': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_9(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_10': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_10(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_11': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_11(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_11_1': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_11_1(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_11_without_pretrain': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_11_without_pretrain( layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_12': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_12(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_13': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_13(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_14': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_14(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_15': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_15(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_16': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_16(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_17': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_17(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_18': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet50_18(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_30': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_30(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_31': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_31(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_32': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_32(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_33': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_33(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_40': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_40(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_15_1': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_15_1(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_15_2': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_15_2(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_15_3': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_15_3(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_15_4': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_15_4(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_15_5': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_15_5(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_15_6': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_15_6(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_15_8': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_15_8(layers=34, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_15_9': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_15_9(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_15_10': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_15_10(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_15_11': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_15_11(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_15_12': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_15_12(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet18': model = ResNet(layers=18, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet50_20': model = ResNet50_20(Bottleneck, [3, 4, 6, 3]) elif args.arch == 'UNet': model = UNet() elif args.arch == 'UP_only': model = UP_only() elif args.arch == 'ResNet_bicubic': # 调用ResNet的定义实例化model,这里的in_channels是 model = ResNet_bicubic(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'VDSR': model = VDSR() elif args.arch == 'VDSR_without_res': model = VDSR_without_res() elif args.arch == 'VDSR_16': model = VDSR_16() elif args.arch == 'VDSR_16_2': model = VDSR_16_2() elif args.arch == 'Leon_resnet50': model = Leon_resnet50() elif args.arch == 'Leon_resnet101': model = Leon_resnet101() elif args.arch == 'Leon_resnet18': model = Leon_resnet18() elif args.arch == 'Double_resnet50': model = Double_resnet50() print("=> model created.") if args.finetune: print("===============loading finetune model=====================") assert os.path.isfile(config.fitune_model_dir), \ "=> no checkpoint found at '{}'".format(config.fitune_model_dir) print("=> loading checkpoint '{}'".format(config.fitune_model_dir)) best_model_filename = config.fitune_model_dir checkpoint = torch.load(best_model_filename) args.start_epoch = checkpoint['epoch'] + 1 #best_result = checkpoint['best_result'] model_fitune = checkpoint['model'] model_fitune_dict = model_fitune.state_dict() model_dict = model.state_dict() for k in model_fitune_dict: if k in model_dict: #print("There is model k: ",k) model_dict[k] = model_fitune_dict[k] #model_dict={k:v for k,v in model_fitune_dict.items() if k in model_dict} model_dict.update(model_fitune_dict) model.load_state_dict(model_dict) #optimizer = checkpoint['optimizer'] print("=> loaded checkpoint (epoch {})".format( checkpoint['epoch'])) #optimizer = torch.optim.SGD(model.parameters(), args.lr,momentum=args.momentum, weight_decay=args.weight_decay) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, amsgrad=True, weight_decay=args.weight_decay) ''' optimizer = torch.optim.Adam( [ #{'params':model.base.parameters()}, 3 {'params': model.re_conv_Y_1.parameters(),'lr':0.0001}, {'params': model.re_conv_Y_2.parameters(), 'lr': 0.0001}, {'params': model.re_conv_Y_3.parameters(), 'lr': 0.0001}, #3 {'params': model.re_deconv_up0.parameters(), 'lr': 0.0001}, {'params': model.re_deconv_up1.parameters(), 'lr': 0.0001}, {'params': model.re_deconv_up2.parameters(), 'lr': 0.0001}, #3 {'params': model.re_conv1.parameters(), 'lr': 0.0001}, {'params': model.re_bn1.parameters(), 'lr': 0.0001}, {'params': model.re_conv4.parameters(), 'lr': 0.0001}, #5 {'params': model.re_ResNet50_layer1.parameters(), 'lr': 0.0001}, {'params': model.re_ResNet50_layer2.parameters(), 'lr': 0.0001}, {'params': model.re_ResNet50_layer3.parameters(), 'lr': 0.0001}, {'params': model.re_ResNet50_layer4.parameters(), 'lr': 0.0001}, {'params': model.re_bn2.parameters(), 'lr': 0.0001}, #5 {'params': model.re_deconcv_res_up1.parameters(), 'lr': 0.0001}, {'params': model.re_deconcv_res_up2.parameters(), 'lr': 0.0001}, {'params': model.re_deconcv_res_up3.parameters(), 'lr': 0.0001}, {'params': model.re_deconcv_res_up4.parameters(), 'lr': 0.0001}, {'params': model.re_deconv_last.parameters(), 'lr': 0.0001}, #denoise net 3 {'params': model.conv_denoise_1.parameters(), 'lr': 0}, {'params': model.conv_denoise_2.parameters(), 'lr': 0}, {'params': model.conv_denoise_3.parameters(), 'lr': 0} ] , lr=args.lr, amsgrad=True, weight_decay=args.weight_decay) ''' for state in optimizer.state.values(): for k, v in state.items(): print(type(v)) if torch.is_tensor(v): state[k] = v.cuda(config.cuda_id) print(optimizer) # create new csv files with only header with open(train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() with open(test_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() # writer = SummaryWriter(log_dir='logs') model = model.cuda(config.cuda_id) #torch.save(model, './net1.pkl') for state in optimizer.state.values(): for k, v in state.items(): print(type(v)) if torch.is_tensor(v): state[k] = v.cuda() print("=> model transferred to GPU.") for epoch in range(args.start_epoch, args.epochs): train(train_loader, val_loader, model, criterion, optimizer, epoch, args.lr) # train for one epoch
def main(): global args, best_result, output_directory if torch.cuda.device_count() > 1: args.batch_size = args.batch_size * torch.cuda.device_count() train_loader = NYUDepth_loader(args.data_path, batch_size=args.batch_size, isTrain=True) val_loader = NYUDepth_loader(args.data_path, batch_size=args.batch_size, isTrain=False) else: train_loader = NYUDepth_loader(args.data_path, batch_size=args.batch_size, isTrain=True) val_loader = NYUDepth_loader(args.data_path, isTrain=False) if args.resume: assert os.path.isfile(args.resume), \ "=> no checkpoint found at '{}'".format(args.resume) print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) # args = checkpoint['args'] # print('保留参数:', args) start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] if torch.cuda.device_count() > 1: model_dict = checkpoint['model'].module.state_dict( ) # 如果是多卡训练的要加module else: model_dict = checkpoint['model'].state_dict() model = DORN_nyu.DORN() model.load_state_dict(model_dict) del model_dict # 删除载入的模型 # 使用SGD进行优化 optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) else: print("=> creating Model") model = DORN_nyu.DORN() print("=> model created.") optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) start_epoch = 0 # 如果有多GPU 使用多GPU训练 if torch.cuda.device_count(): print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model = model.cuda() # 定义loss函数 criterion = criteria.ordLoss() # 创建保存结果目录文件 output_directory = utils.get_output_directory(args) if not os.path.exists(output_directory): os.makedirs(output_directory) best_txt = os.path.join(output_directory, 'best.txt') log_path = os.path.join( output_directory, 'logs', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname()) if os.path.isdir(log_path): shutil.rmtree(log_path) os.makedirs(log_path) logger = SummaryWriter(log_path) for epoch in range(start_epoch, args.epochs): # lr = utils.adjust_learning_rate(optimizer, args.lr, epoch) # 更新学习率 train(train_loader, model, criterion, optimizer, epoch, logger) # train for one epoch result, img_merge = validate(val_loader, model, epoch, logger) # evaluate on validation set # remember best rmse and save checkpoint is_best = result.rmse < best_result.rmse if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write( "epoch={}\nrmse={:.3f}\nrml={:.3f}\nlog10={:.3f}\nd1={:.3f}\nd2={:.3f}\ndd31={:.3f}\nt_gpu={:.4f}\n" .format(epoch, result.rmse, result.absrel, result.lg10, result.delta1, result.delta2, result.delta3, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) # 每个Epoch都保存解雇 utils.save_checkpoint( { 'args': args, 'epoch': epoch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch, output_directory) logger.close()
def main(): global args, best_result, output_directory, train_csv, test_csv # evaluation mode start_epoch = 0 if args.evaluate: assert os.path.isfile(args.evaluate), \ f"[Error] Can't find the specified checkpoint at '{args.evaluate}'" print(f"[Info] loading the model '{args.evaluate}'") checkpoint = torch.load(args.evaluate) output_directory = os.path.dirname(args.evaluate) args = checkpoint['args'] print(args) train_loader, val_loader = create_data_loaders(args) model_weights = checkpoint['model_state_dict'] # Create model if args.arch == "resnet18_multistage_uncertainty" or \ args.arch == "resnet18_multistage_uncertainty_fixs": model, loss_weights = create_model(args, output_size=train_loader.dataset.output_size) else: model = create_model(args, output_size=train_loader.dataset.output_size) loss_weights = None model.load_state_dict(model_weights, strict=False) model = model.cuda() print(f"[Info] Loaded best model (epoch {checkpoint['epoch']})") args.evaluate = True validate(val_loader, model, checkpoint['epoch'], write_to_file=False) return # optionally resume from a checkpoint elif args.resume: chkpt_path = args.resume assert os.path.isfile(chkpt_path), \ f"[Info] No checkpoint found at '{chkpt_path}'" print(f"=> loading checkpoint '{chkpt_path}'") checkpoint = torch.load(chkpt_path) args = checkpoint['args'] print(args) start_epoch = checkpoint['epoch'] + 1 try: best_result = checkpoint['best_result'] except: best_result.set_to_worst() # Create dataloader first args.validation = True args.workers = 8 if (args.data == "nuscenes") and (args.modality == "rgbd") and (args.sparsifier == "uar"): args.sparsifier = None # Create dataloader if args.validation: train_loader, val_loader = create_data_loaders(args) else: train_loader = create_data_loaders(args) # Load from model's state dict instead model_weights = checkpoint['model_state_dict'] # Create model if args.arch == "resnet18_multistage_uncertainty" or \ args.arch == "resnet18_multistage_uncertainty_fixs": model, loss_weights = create_model(args, output_size=train_loader.dataset.output_size) else: model = create_model(args, output_size=train_loader.dataset.output_size) loss_weights = None model.load_state_dict(model_weights, strict=False) model = model.cuda() # Create optimizer optimizer = torch.optim.SGD( model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay ) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) output_directory = os.path.dirname(os.path.abspath(chkpt_path)) print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) args.resume = True # Create new model else: print(args) # Create dataloader if args.validation: train_loader, val_loader = create_data_loaders(args) else: train_loader = create_data_loaders(args) # Create model if args.arch == "resnet18_multistage_uncertainty" or \ args.arch == "resnet18_multistage_uncertainty_fixs": model, loss_weights = create_model(args, output_size=train_loader.dataset.output_size) else: model = create_model(args, output_size=train_loader.dataset.output_size) loss_weights = None # Create optimizer optimizer = torch.optim.SGD( model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay ) model = model.cuda() # Define loss function (criterion) and optimizer criterion = {} if args.criterion == 'l2': criterion["depth"] = MaskedMSELoss().cuda() elif args.criterion == 'l1': criterion["depth"] = MaskedL1Loss().cuda() else: raise ValueError("[Error] Unknown criterion...") # Add smoothness loss to the criterion if args.arch == "resnet18_multistage_uncertainty" or \ args.arch == "resnet18_multistage_uncertainty_fixs": criterion["smooth"] = SmoothnessLoss().cuda() # Create results folder, if not already exists output_directory = utils.get_output_directory(args) if not os.path.exists(output_directory): os.makedirs(output_directory) train_csv = os.path.join(output_directory, 'train.csv') test_csv = os.path.join(output_directory, 'test.csv') best_txt = os.path.join(output_directory, 'best.txt') # Create new csv files with only header if not args.resume: with open(train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() with open(test_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() # Create summary writer log_path = os.path.join(output_directory, "logs") if not os.path.exists(log_path): os.makedirs(log_path) logger = SummaryWriter(log_path) # Main training loop for epoch in range(start_epoch, args.epochs): # Adjust the learning rate utils.adjust_learning_rate(optimizer, epoch, args.lr) # Record the learning rate summary for i, param_group in enumerate(optimizer.param_groups): old_lr = float(param_group['lr']) logger.add_scalar('Lr/lr_' + str(i), old_lr, epoch) # Perform training (train for one epoch) train(train_loader, model, criterion, optimizer, epoch, loss_weights, logger=logger) # Perform evaluation if args.validation: result, img_merge = validate(val_loader, model, epoch, logger=logger) is_best = result.rmse < best_result.rmse if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write("epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n". format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) # Save different things in different mode if args.validation: utils.save_checkpoint({ 'args': args, 'epoch': epoch, 'arch': args.arch, 'model_state_dict': model.state_dict(), 'best_result': best_result, 'optimizer_state_dict' : optimizer.state_dict(), }, is_best, epoch, output_directory) else: utils.save_checkpoint({ 'args': args, 'epoch': epoch, 'arch': args.arch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, False, epoch, output_directory)
def main(): global args, best_result, output_directory, train_csv, test_csv print(args) start = 0 # evaluation mode if args.evaluate: datasets = configuration_file.datasets_path valdir = os.path.join(datasets, args.data, 'val') print("Validation directory ", valdir) if args.data == 'nyudepthv2': from dataloaders.nyu import NYUDataset val_dataset = NYUDataset(valdir, split='val', modality=args.modality) else: raise RuntimeError('Dataset not found.') #set batch size to be 1 for validation val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) print("=> validation loaders created.") assert os.path.isfile(args.evaluate), \ "=> no model found at '{}'".format(args.evaluate) print("=> loading model '{}'".format(args.evaluate)) checkpoint = torch.load(args.evaluate) if type(checkpoint) is dict: args.start_epoch = checkpoint['epoch'] best_result = checkpoint['best_result'] model = checkpoint['model'] print("=> loaded best model (epoch {})".format( checkpoint['epoch'])) else: model = checkpoint args.start_epoch = 0 output_directory = os.path.dirname(args.evaluate) validate(val_loader, model, args.start_epoch, write_to_file=False) return # resume from a particular check point elif args.resume: chkpt_path = args.resume assert os.path.isfile( chkpt_path), "=> no checkpoint found at '{}'".format(chkpt_path) print("=> loading checkpoint '{}'".format(chkpt_path)) checkpoint = torch.load(chkpt_path) args = checkpoint['args'] start_epoch = checkpoint['epoch'] + 1 # load epoch number start = start_epoch # resume from the checkpoint epoch best_result = checkpoint['best_result'] # load best result model = checkpoint['model'] # load model optimizer = checkpoint['optimizer'] # load optimizer output_directory = os.path.dirname(os.path.abspath(chkpt_path)) print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) train_loader, val_loader = create_data_loaders( args) # create data loader args.resume = True # create new model if checkpoint does not exist elif args.train: train_loader, val_loader = create_data_loaders( args) # load train and validation data print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder)) in_channels = len(args.modality) if args.arch == 'MobileNet': # if encoder is MobileNet model = models.MobileNetSkipAdd( output_size=train_loader.dataset.output_size ) # MobileNet model is created else: model = models.MobileNetSkipAdd( output_size=train_loader.dataset.output_size ) # by default MobileNet print("=> model created.") optimizer = torch.optim.SGD(model.parameters(), args.lr, \ momentum=args.momentum, weight_decay=args.weight_decay) # configure optimizer if configuration_file.GPU == True: if configuration_file.MULTI_GPU == True: # training on multiple GPU model = torch.nn.DataParallel(model).cuda() else: # training on single GPU model = model.cuda() else: pass # define loss function and optimizer if args.criterion == 'l2': if configuration_file.GPU == True: criterion = MaskedMSELoss().cuda() else: criterion = MaskedMSELoss() elif args.criterion == 'l1': if configuration_file.GPU == True: criterion = MaskedL1Loss().cuda() else: criterion = MaskedL1Loss() # create results folder, if not already exists output_directory = utils.get_output_directory(args) if not os.path.exists(output_directory): # create new directory os.makedirs(output_directory) train_csv = os.path.join(output_directory, 'train.csv') # store training result test_csv = os.path.join(output_directory, 'test.csv') # store test result best_txt = os.path.join(output_directory, 'best.txt') # store best result # create new csv files with only header if not args.resume: with open(train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() with open(test_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() # training is strarted from here for epoch in range(start, args.epochs): utils.adjust_learning_rate(optimizer, epoch, args.lr) train(train_loader, model, criterion, optimizer, epoch) # train for one epoch result, img_merge = validate(val_loader, model, epoch) # evaluate on validation set # remember best rmse and save checkpoint is_best = result.rmse < best_result.rmse # compare result of the current epoch and best result if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write( "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n" .format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) utils.save_checkpoint( { 'args': args, 'epoch': epoch, 'arch': args.arch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch, output_directory)
def main(): global args, best_result, output_directory, train_csv, test_csv # create results folder, if not already exists output_directory = utils.get_output_directory(args) if not os.path.exists(output_directory): os.makedirs(output_directory) #建立文件 train_csv = os.path.join(output_directory, 'train.csv') test_csv = os.path.join(output_directory, 'test.csv') best_txt = os.path.join(output_directory, 'best.txt') # define loss function (criterion) and optimizer,定义误差函数和优化器 if args.criterion == 'l2': #均方差 criterion = criteria.MaskedMSELoss().cuda() elif args.criterion == 'l1': criterion = criteria.MaskedL1Loss().cuda() # sparsifier is a class for generating random sparse depth input from the ground truth sparsifier = None max_depth = args.max_depth if args.max_depth >= 0.0 else np.inf if args.sparsifier == UniformSampling.name: sparsifier = UniformSampling(num_samples=args.num_samples, max_depth=max_depth) elif args.sparsifier == SimulatedStereo.name: sparsifier = SimulatedStereo(num_samples=args.num_samples, max_depth=max_depth) # Data loading code print("=> creating data loaders ...") traindir = os.path.join('data', args.data, 'train') valdir = os.path.join('data', args.data, 'val') if args.data == 'nyudepthv2': #需要的时候才把函数载入 from dataloaders.nyu_dataloader import NYUDataset train_dataset = NYUDataset(traindir, type='train', modality=args.modality, sparsifier=sparsifier) val_dataset = NYUDataset(valdir, type='val', modality=args.modality, sparsifier=sparsifier) elif args.data == 'kitti': from dataloaders.kitti_dataloader import KITTIDataset train_dataset = KITTIDataset(traindir, type='train', modality=args.modality, sparsifier=sparsifier) val_dataset = KITTIDataset(valdir, type='val', modality=args.modality, sparsifier=sparsifier) else: raise RuntimeError( 'Dataset not found.' + 'The dataset must be either of nyudepthv2 or kitti.') train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None, worker_init_fn=lambda work_id: np.random.seed(work_id)) # worker_init_fn ensures different sampling patterns for each data loading thread # set batch size to be 1 for validation val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) print("=> data loaders created.") # evaluation mode,测试模式,拿最好的效果进行测试 if args.evaluate: best_model_filename = os.path.join(output_directory, 'model_best.pth.tar') assert os.path.isfile(best_model_filename), \ "=> no best model found at '{}'".format(best_model_filename) print("=> loading best model '{}'".format(best_model_filename)) checkpoint = torch.load(best_model_filename) args.start_epoch = checkpoint['epoch'] best_result = checkpoint['best_result'] model = checkpoint['model'] print("=> loaded best model (epoch {})".format(checkpoint['epoch'])) validate(val_loader, model, checkpoint['epoch'], write_to_file=False) return # optionally resume from a checkpoint elif args.resume: assert os.path.isfile(args.resume), \ "=> no checkpoint found at '{}'".format(args.resume) print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) # create new model,建立模型,并且训练 else: # define model print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder)) in_channels = len( args.modality) #in_channels是modality的长度,如果输入rgbd那么就是4通道。 #这一边只提供了两个选择50或者18 if args.arch == 'resnet50': #调用ResNet的定义实例化model,这里的in_channels是 model = ResNet(layers=50, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) elif args.arch == 'resnet18': model = ResNet(layers=18, decoder=args.decoder, output_size=train_dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) print("=> model created.") optimizer = torch.optim.SGD(model.parameters(), args.lr, \ momentum=args.momentum, weight_decay=args.weight_decay) # create new csv files with only header with open(train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() with open(test_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training model = model.cuda() # print(model) print("=> model transferred to GPU.") for epoch in range(args.start_epoch, args.epochs): utils.adjust_learning_rate(optimizer, epoch, args.lr) train(train_loader, model, criterion, optimizer, epoch) # train for one epoch result, img_merge = validate( val_loader, model, epoch) # evaluate on validation set,每次训练完以后都要测试一下 # remember best rmse and save checkpoint is_best = result.rmse < best_result.rmse if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write( "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n" .format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) utils.save_checkpoint( { 'args': args, 'epoch': epoch, 'arch': args.arch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch, output_directory)
def main(): global args, best_result, output_directory # set random seed torch.manual_seed(args.manual_seed) torch.cuda.manual_seed(args.manual_seed) np.random.seed(args.manual_seed) random.seed(args.manual_seed) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") args.batch_size = args.batch_size * torch.cuda.device_count() else: print("Let's use GPU ", torch.cuda.current_device()) train_loader, val_loader = create_loader(args) if args.resume: assert os.path.isfile(args.resume), \ "=> no checkpoint found at '{}'".format(args.resume) print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = 0 # start_epoch = checkpoint['epoch'] + 1 # best_result = checkpoint['best_result'] # optimizer = checkpoint['optimizer'] # solve 'out of memory' model = checkpoint['model'] optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999)) # optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) # clear memory del checkpoint # del model_dict torch.cuda.empty_cache() else: print("=> creating Model") # input_shape = [args.batch_size,3,256,512] model = UNet(3, 1) print("=> model created.") start_epoch = 0 print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999)) # optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # You can use DataParallel() whether you use Multi-GPUs or not model = nn.DataParallel(model).cuda() # when training, use reduceLROnPlateau to reduce learning rate scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=args.lr_patience) # loss function criterion = criteria.myL1Loss() # criterion = nn.SmoothL1Loss() # create directory path output_directory = utils.get_output_directory(args) if not os.path.exists(output_directory): os.makedirs(output_directory) best_txt = os.path.join(output_directory, 'best.txt') config_txt = os.path.join(output_directory, 'config.txt') # write training parameters to config file if not os.path.exists(config_txt): with open(config_txt, 'w') as txtfile: args_ = vars(args) args_str = '' for k, v in args_.items(): args_str = args_str + str(k) + ':' + str(v) + ',\t\n' txtfile.write(args_str) for epoch in range(start_epoch, args.epochs): # remember change of the learning rate old_lr = 0.0 # adjust_learning_rate(optimizer,epoch) for i, param_group in enumerate(optimizer.param_groups): old_lr = float(param_group['lr']) print("lr: %f" % old_lr) train(train_loader, model, criterion, optimizer, epoch) # train for one epoch result, img_merge = validate(val_loader, model, epoch) # evaluate on validation set # remember best mae and save checkpoint is_best = result.mae < best_result.mae if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write("epoch={}, mae={:.3f}, " "t_gpu={:.4f}".format(epoch, result.mae, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) # save checkpoint for each epoch utils.save_checkpoint( { 'args': args, 'epoch': epoch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch, output_directory) # when mae doesn't fall, reduce learning rate scheduler.step(result.mae)
def main(): global args, best_result, output_directory, train_csv, test_csv # Data loading code datasets = '/content/drive/MyDrive' #valdir = os.path.join(datasets, 'Datasets', args.data, 'val') #valdir = '/content/drive/MyDrive/Datasets/Nyudepthv2Previous/nyudepthv2/val/official/' valdir = os.path.join(datasets, 'Datasets', 'Nyudepthv2Previous', args.data, 'val') if args.data == 'nyudepthv2': from dataloaders.nyu import NYUDataset val_dataset = NYUDataset(valdir, split='val', modality=args.modality) elif args.data == 'kitti': from dataloaders.kitti import KITTIDataset val_dataset = KITTIDataset(valdir, type='val', modality=args.modality) else: raise RuntimeError('Dataset not found.') # set batch size to be 1 for validation val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) print("=> validation loaders created.") # evaluation mode if args.evaluate: assert os.path.isfile(args.evaluate), \ "=> no model found at '{}'".format(args.evaluate) print("=> loading model '{}'".format(args.evaluate)) checkpoint = torch.load(args.evaluate) if type(checkpoint) is dict: args.start_epoch = checkpoint['epoch'] best_result = checkpoint['best_result'] model = checkpoint['model'] print("=> loaded best model (epoch {})".format( checkpoint['epoch'])) else: model = checkpoint args.start_epoch = 0 output_directory = os.path.dirname(args.evaluate) validate(val_loader, model, args.start_epoch, write_to_file=False) return # training mode # resume from a check point elif args.resume: print("Resume") chkpt_path = args.resume assert os.path.isfile( chkpt_path), "=> no checkpoint found at '{}'".format(chkpt_path) print("=> loading checkpoint '{}'".format(chkpt_path)) checkpoint = torch.load(chkpt_path) args = checkpoint['args'] start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] output_directory = os.path.dirname(os.path.abspath(chkpt_path)) print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) train_loader, val_loader = create_data_loaders(args) args.resume = True # create new model elif args.train: print("Inside Train 1----------->") train_loader, val_loader = create_data_loaders(args) print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder)) in_channels = len(args.modality) if args.arch == 'MobileNet': #model = models.MobileNetSkipAdd(output_size=train_loader.dataset.output_size) model = ResNetSkipAdd(layers=50, output_size=train_loader.dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) #print("Mobile Net model ",str(train_loader.dataset.output_size) elif args.arch == 'resnet50': model = ResNet(layers=50, decoder=args.decoder, output_size=train_loader.dataset.output_size, in_channels=in_channels, pretrained=args.pretrained) else: model = models.MobileNetSkipAdd(output_size=train_loader.dataset. output_size) #by default MobileNet print("=> model created.") optimizer = torch.optim.SGD(model.parameters(), args.lr, \ momentum=args.momentum, weight_decay=args.weight_decay) # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training model = model.cuda() # define loss function (criterion) and optimizer if args.criterion == 'l2': criterion = MaskedMSELoss().cuda() elif args.criterion == 'l1': criterion = MaskedL1Loss().cuda() # create results folder, if not already exists print("Arguments ") print(args) output_directory = utils.get_output_directory(args) if not os.path.exists(output_directory): os.makedirs(output_directory) train_csv = os.path.join(output_directory, 'train.csv') test_csv = os.path.join(output_directory, 'test.csv') best_txt = os.path.join(output_directory, 'best.txt') # create new csv files with only header if not args.resume: with open(train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() with open(test_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() #Training is strarted from here if args.train == True: print("Training...........(args.train)", args.train) start = 0 for epoch in range(start, args.epochs): utils.adjust_learning_rate(optimizer, epoch, args.lr) train(train_loader, model, criterion, optimizer, epoch) # train for one epoch result, img_merge = validate(val_loader, model, epoch) # evaluate on validation set # remember best rmse and save checkpoint is_best = result.rmse < best_result.rmse if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write( "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n" .format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) utils.save_checkpoint( { 'args': args, 'epoch': epoch, 'arch': args.arch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch, output_directory) elif args.resume == True: print("Resume......................") start = start_epoch for epoch in range(start, args.epochs): print("Epoch inside resume ", epoch) utils.adjust_learning_rate(optimizer, epoch, args.lr) train(train_loader, model, criterion, optimizer, epoch) # train for one epoch result, img_merge = validate(val_loader, model, epoch) # evaluate on validation set # remember best rmse and save checkpoint is_best = result.rmse < best_result.rmse if is_best: best_result = result with open(best_txt, 'w') as txtfile: txtfile.write( "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n" .format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time)) if img_merge is not None: img_filename = output_directory + '/comparison_best.png' utils.save_image(img_merge, img_filename) utils.save_checkpoint( { 'args': args, 'epoch': epoch, 'arch': args.arch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch, output_directory)
def main(): global args, output_directory, train_csv, test_csvs, mm_scaler # MinMax-Scaler! os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # evaluation mode start_epoch = 0 if args.evaluate: assert os.path.isfile(args.evaluate), \ "=> no best model found at '{}'".format(args.evaluate) print("=> loading best model '{}'".format(args.evaluate)) checkpoint = torch.load(args.evaluate) output_directory = os.path.dirname(args.evaluate) args = checkpoint['args'] start_epoch = checkpoint['epoch'] + 1 model = checkpoint['model'] print("=> loaded best model (epoch {})".format(checkpoint['epoch'])) _, val_loader = create_data_loaders(args, mm_scaler) args.evaluate = True validate(val_loader, model, checkpoint['epoch'], write_to_file=False) return # optionally resume from a checkpoint elif args.resume: chkpt_path = args.resume assert os.path.isfile(chkpt_path), \ "=> no checkpoint found at '{}'".format(chkpt_path) print("=> loading checkpoint '{}'".format(chkpt_path)) checkpoint = torch.load(chkpt_path) args = checkpoint['args'] start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] output_directory = os.path.dirname(os.path.abspath(chkpt_path)) print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) train_loader, val_loader = create_data_loaders(args, mm_scaler) args.resume = True # create new model else: train_loader, val_loader = create_data_loaders(args, mm_scaler) print("=> creating Model ({}) ...".format(args.arch)) from models.rnn_model import Model if args.arch == 'LSTM': model = Model(input_dim=args.x_dim, hidden_dim=args.hidden_size, Y_target=args.y_target, model_type="lstm") elif args.arch == 'GRU': model = Model(input_dim=args.x_dim, hidden_dim=args.hidden_size, Y_target=args.y_target, model_type="gru") if args.arch == 'RNN': model = Model(input_dim=args.x_dim, hidden_dim=args.hidden_size, Y_target=args.y_target, model_type="rnn") print("=> model created.") model_parameters = list(model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print("Num. of parameters: ", params) optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay) # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training model = model.cuda() criterion = nn.MSELoss().cuda() # create results folder, if not already exists output_directory = utils.get_output_directory(args) if not os.path.exists(output_directory): os.makedirs(output_directory) train_csv = os.path.join(output_directory, 'train.csv') test_csvs = [] for i in range(NUM_VAL_CSVS): test_csv_name = 'test_' + str(i) + '.csv' test_csv_each = os.path.join(output_directory, test_csv_name) test_csvs.append(test_csv_each) test_csv_total = os.path.join(output_directory, 'test.csv') test_csvs.append(test_csv_total) # 1 indicates total assert NUM_VAL_CSVS + 1 == len(test_csvs), "Something's wrong!" # create new csv files with only header if not args.resume: with open(train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=[]) writer.writeheader() for test_csv in test_csvs: with open(test_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() best_rmse = 1000000000 print("=> Learning start.") for epoch in range(start_epoch, args.epochs): utils.adjust_learning_rate(optimizer, epoch, args.lr, args.decay_rate, args.decay_step) print("=> On training...") train(train_loader, model, criterion, optimizer, epoch) # train for one epoch if epoch % args.validation_interval == 0: print("=> On validating...") result_rmse, results_list = validate( val_loader, model, epoch) # evaluate on validation set # Save validation results print("=> On drawing results...") pngname = os.path.join( output_directory, str(epoch).zfill(2) + "_" + str(round(result_rmse, 5)) + ".png") utils.plot_trajectory(pngname, results_list[:-1]) is_best = best_rmse > result_rmse if is_best: best_rmse = result_rmse best_name = os.path.join(output_directory, "best.csv") with open(best_name, 'w', newline='') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for result_container in results_list: avg = result_container.result writer.writerow({ 'rmse': avg.rmse, 'mean': avg.mean, 'median': avg.median, 'var': avg.var, 'max': avg.error_max }) writer.writerow({ 'rmse': epoch, 'mean': 0, 'median': 0, 'var': 0, 'max': 0 }) utils.save_output(results_list, epoch, output_directory) utils.save_checkpoint( { 'args': args, 'epoch': epoch, 'arch': args.arch, 'model': model, 'optimizer': optimizer, 'scaler': mm_scaler }, is_best, epoch, output_directory)