def main(): global args checkpoint = None #is_eval = False is_eval = True # 我加的,用来测试,2020/02/26 if args.evaluate: args_new = args if os.path.isfile(args.evaluate): print("=> loading checkpoint '{}' ... ".format(args.evaluate), end='') checkpoint = torch.load(args.evaluate, map_location=device) args = checkpoint['args'] args.data_folder = args_new.data_folder args.val = args_new.val is_eval = True print("Completed.") else: print("No model found at '{}'".format(args.evaluate)) return print("=> creating model and optimizer ... ", end='') model = DepthCompletionNet(args).to(device) model_named_params = [ p for _, p in model.named_parameters() if p.requires_grad ] optimizer = torch.optim.Adam(model_named_params, lr=args.lr, weight_decay=args.weight_decay) print("completed.") if checkpoint is not None: model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> checkpoint state loaded.") model = torch.nn.DataParallel(model) # Data loading code print("=> creating data loaders ... ") val_dataset = KittiDepth('test_completion', args) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=1, shuffle=False, num_workers=2, pin_memory=True) # set batch size to be 1 for validation print("\t==> val_loader size:{}".format(len(val_loader))) # create backups and results folder logger = helper.logger(args) if checkpoint is not None: logger.best_result = checkpoint['best_result'] print("=> logger created.") if is_eval: print("=> starting model test ...") result, is_best = iterate("test_completion", args, val_loader, model, None, logger, checkpoint['epoch']) return
def main(): global args if args.partial_train == 'yes': # train on a part of the whole train set print( "Can't use partial train here. It is used only for test check. Exit..." ) return if args.test != "yes": print( "This main should use only for testing, but test=yes wat not given. Exit..." ) return print("Evaluating test set with main_test:") whole_ts = time.time() checkpoint = None is_eval = False if args.evaluate: # test a finished model args_new = args # copies if os.path.isfile(args.evaluate): # path is an existing regular file print("=> loading finished model from '{}' ... ".format( args.evaluate), end='') # "end=''" disables the newline checkpoint = torch.load(args.evaluate, map_location=device) args = checkpoint['args'] args.data_folder = args_new.data_folder args.val = args_new.val args.save_images = args_new.save_images args.result = args_new.result is_eval = True print("Completed.") else: print("No model found at '{}'".format(args.evaluate)) return elif args.resume: # resume from a checkpoint args_new = args if os.path.isfile(args.resume): print("=> loading checkpoint from '{}' ... ".format(args.resume), end='') checkpoint = torch.load(args.resume, map_location=device) args.start_epoch = checkpoint['epoch'] + 1 args.data_folder = args_new.data_folder args.val = args_new.val print("Completed. Resuming from epoch {}.".format( checkpoint['epoch'])) else: print("No checkpoint found at '{}'".format(args.resume)) return print("=> creating model and optimizer ... ", end='') model = DepthCompletionNet(args).to(device) model_named_params = [ p for _, p in model.named_parameters( ) # "_, p" is a direct analogy to an assignment statement k, _ = (0, 1). Unpack a tuple object if p.requires_grad ] optimizer = torch.optim.Adam(model_named_params, lr=args.lr, weight_decay=args.weight_decay) print("completed.") [f'{k:<20}: {v}' for k, v in model.__dict__.items()] if checkpoint is not None: model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> checkpoint state loaded.") model = torch.nn.DataParallel( model ) # make the model run parallelly: splits your data automatically and sends job orders to multiple models on several GPUs. # After each model finishes their job, DataParallel collects and merges the results before returning it to you # data loading code print("=> creating data loaders ... ") if not is_eval: # we're not evaluating train_dataset = KittiDepth('train', args) # get the paths for the files train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None) # load them print("\t==> train_loader size:{}".format(len(train_loader))) if args_new.test == "yes": # will take the data from the "test" folders val_dataset = KittiDepth('test', args) is_test = 'yes' else: val_dataset = KittiDepth('val', args) is_test = 'no' val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=1, shuffle=False, num_workers=2, pin_memory=True) # set batch size to be 1 for validation print("\t==> val_loader size:{}".format(len(val_loader))) # create backups and results folder logger = helper.logger(args, is_test) if checkpoint is not None: logger.best_result = checkpoint['best_result'] print("=> logger created.") # logger records sequential data to a log file # main code - run the NN if is_eval: print("=> starting model evaluation ...") result, is_best = iterate("val", args, val_loader, model, None, logger, checkpoint['epoch']) return print("=> starting model training ...") for epoch in range(args.start_epoch, args.epochs): print("=> start training epoch {}".format(epoch) + "/{}..".format(args.epochs)) train_ts = time.time() iterate("train", args, train_loader, model, optimizer, logger, epoch) # train for one epoch result, is_best = iterate("val", args, val_loader, model, None, logger, epoch) # evaluate on validation set helper.save_checkpoint({ # save checkpoint 'epoch': epoch, 'model': model.module.state_dict(), 'best_result': logger.best_result, 'optimizer': optimizer.state_dict(), 'args': args, }, is_best, epoch, logger.output_directory) print("finish training epoch {}, time elapsed {:.2f} hours, \n".format( epoch, (time.time() - train_ts) / 3600)) last_checkpoint = os.path.join( logger.output_directory, 'checkpoint-' + str(epoch) + '.pth.tar' ) # delete last checkpoint because we have the best_model and we dont need it os.remove(last_checkpoint) print("finished model training, time elapsed {0:.2f} hours, \n".format( (time.time() - whole_ts) / 3600))
model_is.sort() print(model_is) while model_is in used_model_is: random.shuffle(possible_model_is) random.shuffle(possible_model_is) random.shuffle(possible_model_is) model_is = possible_model_is[0:M] model_is.sort() print(model_is) used_model_is.append(model_is) models = [] for i in model_is: restore_from = "/root/evaluating_bdl/depthCompletion/trained_models/%s_%d/checkpoint_40000.pth" % ( model_id, i) model = DepthCompletionNet().cuda() model = torch.nn.DataParallel(model) model.load_state_dict(torch.load(restore_from)) model.eval() models.append(model) M_float = float(len(models)) print(M_float) batch_losses = [] batch_rmses = [] sigma_alea_values = np.array([]) sigma_epi_values = np.array([]) sigma_pred_values = np.array([]) squared_error_values = np.array([]) for i_iter, batch in enumerate(eval_loader):
models = [] for i, current_NN in enumerate( NNs_weights ): # relevant code from the 'Black-Box' models (to use the NNs for the prediction) checkpoint = None if os.path.isfile(current_NN): print("=> loading checkpoint '{}' ... ".format(current_NN), end='') checkpoint = torch.load(current_NN, map_location=device) args = checkpoint['args'] is_eval = True print("Completed.") else: assert False, ("No model found at '{}'".format(current_NN)) model = DepthCompletionNet(args).to(device) model_named_params = [ p for _, p in model.named_parameters() if p.requires_grad ] model.load_state_dict(checkpoint['model']) model = torch.nn.DataParallel(model) NN_arguments.append(args) models.append(model) # predict & create samples for the training sets # if len(existing_weights) == 0: # not only inferencing for set_num in range(1, M + 1): print("\nSTART PREDICTING train set num: {} for next phase\n". format(set_num)) pred_samp_train_set_time = time.time()
def main(): global args checkpoint = None is_eval = False if args.evaluate: args_new = args if os.path.isfile(args.evaluate): print("=> loading checkpoint '{}' ... ".format(args.evaluate), end='') checkpoint = torch.load(args.evaluate, map_location=device) args = checkpoint['args'] args.data_folder = args_new.data_folder args.val = args_new.val args.result = args_new.result is_eval = True print("Completed.") else: print("No model found at '{}'".format(args.evaluate)) return elif args.resume: # optionally resume from a checkpoint args_new = args if os.path.isfile(args.resume): print("=> loading checkpoint '{}' ... ".format(args.resume), end='') checkpoint = torch.load(args.resume, map_location=device) args.start_epoch = checkpoint['epoch'] + 1 args.data_folder = args_new.data_folder args.val = args_new.val args.result = args_new.result print("Completed. Resuming from epoch {}.".format( checkpoint['epoch'])) else: print("No checkpoint found at '{}'".format(args.resume)) return print("=> creating model and optimizer ... ", end='') model = DepthCompletionNet(args).to(device) model_named_params = [ p for _, p in model.named_parameters() if p.requires_grad ] optimizer = torch.optim.Adam(model_named_params, lr=args.lr, weight_decay=args.weight_decay) print("completed.") if checkpoint is not None: model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> checkpoint state loaded.") model = torch.nn.DataParallel(model) # Data loading code print("=> creating data loaders ... ") if not is_eval: train_dataset = KittiDepth('train', args) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None) print("\t==> train_loader size:{}".format(len(train_loader))) val_dataset = KittiDepth('val', args) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=1, shuffle=False, num_workers=2, pin_memory=True) # set batch size to be 1 for validation print("\t==> val_loader size:{}".format(len(val_loader))) # create backups and results folder logger = helper.logger(args) if checkpoint is not None: logger.best_result = checkpoint['best_result'] print("=> logger created.") if is_eval: print("=> starting model evaluation ...") result, is_best = iterate("val", args, val_loader, model, None, logger, checkpoint['epoch']) return # main loop print("=> starting main loop ...") for epoch in range(args.start_epoch, args.epochs): print("=> starting training epoch {} ..".format(epoch)) iterate("train", args, train_loader, model, optimizer, logger, epoch) # train for one epoch result, is_best = iterate("val", args, val_loader, model, None, logger, epoch) # evaluate on validation set helper.save_checkpoint({ # save checkpoint 'epoch': epoch, 'model': model.module.state_dict(), 'best_result': logger.best_result, 'optimizer' : optimizer.state_dict(), 'args' : args, }, is_best, epoch, logger.output_directory)
crop_size=(352, 352)) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) val_dataset = DatasetKITTIVal(kitti_depth_path=kitti_depth_path) val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=val_batch_size, shuffle=False, num_workers=1) criterion = MaskedL2Gauss().cuda() rmse_criterion = RMSE().cuda() model = DepthCompletionNet().cuda() model = torch.nn.DataParallel(model) model.train() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) optimizer.zero_grad() train_losses = [] batch_train_losses = [] val_losses = [] train_rmses = [] batch_train_rmses = [] val_rmses = [] for i_iter, batch in enumerate(train_loader):
def main(): global args checkpoint = None is_eval = False if args.evaluate: if os.path.isfile(args.evaluate): print("=> loading checkpoint '{}'".format(args.evaluate)) checkpoint = torch.load(args.evaluate) args = checkpoint['args'] is_eval = True print("=> checkpoint loaded.") else: print("=> no model found at '{}'".format(args.evaluate)) return elif args.resume: # optionally resume from a checkpoint if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] + 1 print("=> loaded checkpoint (epoch {})".format( checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) return print("=> creating model and optimizer...") model = DepthCompletionNet(args).cuda() model_named_params = [ p for _, p in model.named_parameters() if p.requires_grad ] optimizer = torch.optim.Adam(model_named_params, lr=args.lr, weight_decay=args.weight_decay) print("=> model and optimizer created.") if checkpoint is not None: model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> checkpoint state loaded.") model = torch.nn.DataParallel(model) print("=> model transferred to multi-GPU.") # Data loading code print("=> creating data loaders ...") if not is_eval: train_dataset, train_loader = get_kitti_dataloader( mode='train', dataset_name=dataset_name, setname='train', args=args) # train_dataset = KittiDepth('train', args) # train_loader = torch.utils.data.DataLoader( # train_dataset, batch_size=args.batch_size, shuffle=True, # num_workers=args.workers, pin_memory=True, sampler=None) val_dataset, val_loader = get_kitti_dataloader(mode='eval', dataset_name=dataset_name, setname='test', args=args) # change dataset here: # val_dataset = KittiDepth('val', args) # val_dataset = KittiDataset(base_dir="./data/kitti/", setname="selval") # val_dataset = vKittiDataset(base_dir="./data/vkitti/", setname="test") # val_dataset = OurDataset(base_dir="/home/bird/data2/dataset/our_lidar/20190315/f_c_1216_352", setname="f_c_1216_352") # val_dataset = OurDataset(base_dir="/home/bird/data2/dataset/our_lidar/20190318/f_c_1216_352", setname="f_c_1216_352_20190318") # val_dataset = NuScenesDataset(base_dir="/home/bird/data2/dataset/nuscenes/projected", setname="f_c_1216_352") # val_loader = torch.utils.data.DataLoader(val_dataset, # batch_size=1, shuffle=False, num_workers=2, pin_memory=True) # set batch size to be 1 for validation print("=> data loaders created.") # create backups and results folder logger = helper.logger(args) # if checkpoint is not None: # logger.best_result = checkpoint['best_result'] print("=> logger created.") if is_eval: result, is_best = iterate("eval", args, val_loader, model, None, logger, checkpoint['epoch'], val_dataset) print(result) print(is_best) return # main loop for epoch in range(args.start_epoch, args.epochs): print("=> starting training epoch {} ..".format(epoch)) iterate("train", args, train_loader, model, optimizer, logger, epoch, train_dataset) # train for one epoch result, is_best = iterate("val", args, val_loader, model, None, logger, epoch, val_dataset) # evaluate on validation set helper.save_checkpoint({ # save checkpoint 'epoch': epoch, 'model': model.module.state_dict(), 'best_result': logger.best_result, 'optimizer' : optimizer.state_dict(), 'args' : args, }, is_best, epoch, logger.output_directory)
learning_rate = 1.0e-5 snapshot_dir = snapshot_dir_base + "_%d/" % i if not os.path.exists(snapshot_dir): os.makedirs(snapshot_dir) train_dataset = DatasetVirtualKITTIAugmentation(virtualkitti_path=virtualkitti_path, max_iters=num_steps*batch_size, crop_size=(352, 352)) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) val_dataset = DatasetVirtualKITTIVal(virtualkitti_path=virtualkitti_path) val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=val_batch_size, shuffle=False, num_workers=1) criterion = MaskedL2Gauss().cuda() rmse_criterion = RMSE().cuda() model = DepthCompletionNet().cuda() model = torch.nn.DataParallel(model) model.train() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) optimizer.zero_grad() train_losses = [] batch_train_losses = [] val_losses = [] train_rmses = [] batch_train_rmses = [] val_rmses = [] for i_iter, batch in enumerate(train_loader): imgs, sparses, targets, file_ids = batch imgs = Variable(imgs.cuda()) # (shape: (batch_size, h, w))
def main(): global args checkpoint = None is_eval = False if args.evaluate: if os.path.isfile(args.evaluate): print("=> loading checkpoint '{}'".format(args.evaluate)) checkpoint = torch.load(args.evaluate) args = checkpoint['args'] is_eval = True print("=> checkpoint loaded.") else: print("=> no model found at '{}'".format(args.evaluate)) return elif args.resume: # optionally resume from a checkpoint if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] + 1 print("=> loaded checkpoint (epoch {})".format( checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) return print("=> creating model and optimizer...") model = DepthCompletionNet(args).cuda() model_named_params = [ p for _, p in model.named_parameters() if p.requires_grad ] optimizer = torch.optim.Adam(model_named_params, lr=args.lr, weight_decay=args.weight_decay) print("=> model and optimizer created.") if checkpoint is not None: model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> checkpoint state loaded.") model = torch.nn.DataParallel(model) print("=> model transferred to multi-GPU.") # Data loading code print("=> creating data loaders ...") if not is_eval: train_dataset = KittiDepth('train', args) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None) val_dataset = KittiDepth('val', args) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=1, shuffle=False, num_workers=2, pin_memory=False) # set batch size to be 1 for validation print("=> data loaders created.") # create backups and results folde logger = helper.logger(args) if checkpoint is not None: logger.best_result = checkpoint['best_result'] print("=> logger created.") if is_eval: result, result_intensity, is_best = iterate("val", args, val_loader, model, None, logger, checkpoint['epoch']) return # main loop for epoch in range(args.start_epoch, args.epochs): print("=> starting training epoch {} ..".format(epoch)) iterate("train", args, train_loader, model, optimizer, logger, epoch) # train for one epoch result, result_intensity, is_best = iterate( "val", args, val_loader, model, None, logger, epoch) # evaluate on validation set helper.save_checkpoint({ # save checkpoint 'epoch': epoch, 'model': model.module.state_dict(), 'best_result': logger.best_result, 'optimizer' : optimizer.state_dict(), 'args' : args, }, is_best, epoch, logger.output_directory) logger.writer.add_scalar('eval/rmse_depth', result.rmse, epoch) logger.writer.add_scalar('eval/rmse_intensity', result_intensity.rmse, epoch) logger.writer.add_scalar('eval/mae_depth', result.mae, epoch) logger.writer.add_scalar('eval/mae_intensity', result_intensity.mae, epoch) # logger.writer.add_scalar('eval/irmse_depth', result.irmse, epoch) # logger.writer.add_scalar('eval/irmse_intensity', result_intensity.irmse, epoch) logger.writer.add_scalar('eval/rmse_total', result.rmse + args.wi * result_intensity.rmse, epoch)