def train(epoch, model, optimizer, batch): writer = SummaryWriter() model.train() mydataset = MyDataset('/notebooks/EuRoC_modify/', 'V1_01_easy') #criterion = nn.MSELoss() criterion = nn.L1Loss(size_average=False) start = 5 end = len(mydataset) - batch batch_num = (end - start) #/ batch startT = time.time() with tools.TimerBlock("Start training") as block: for k in range(epoch): for i in range(start, end): #len(mydataset)-1): data, data_imu, target, target2 = mydataset.load_img_bat( i, batch) data, data_imu, target, target2 = data.cuda(), data_imu.cuda( ), target.cuda(), target2.cuda() optimizer.zero_grad() output = model(data, data_imu) loss = criterion(output, target) + criterion(output, target2) loss.backward() optimizer.step() avgTime = block.avg() remainingTime = int( (batch_num * epoch - (i + batch_num * k)) * avgTime) rTime_str = "{:02d}:{:02d}:{:02d}".format( int(remainingTime / 60 // 60), int(remainingTime // 60 % 60), int(remainingTime % 60)) block.log( 'Train Epoch: {}\t[{}/{} ({:.0f}%)]\tLoss: {:.6f}, TimeAvg: {:.4f}, Remaining: {}' .format(k, i, batch_num, 100. * (i + batch_num * k) / (batch_num * epoch), loss.data[0], avgTime, rTime_str)) writer.add_scalar('loss', loss.data[0], k * batch_num + i) check_str = 'checkpoint_{}.pt'.format(k) torch.save(model.state_dict(), check_str) #torch.save(model, 'vinet_v1_01.pt') #model.save_state_dict('vinet_v1_01.pt') torch.save(model.state_dict(), 'vinet_v1_01.pt') writer.export_scalars_to_json("./all_scalars.json") writer.close()
def train(epoch, model, optimizer, batch): model.train() mydataset = MyDataset('/notebooks/data/euroc/', 'V1_01_easy') criterion = nn.MSELoss() start = 5 end = len(mydataset) - batch batch_num = (end - start) / batch startT = time.time() with tools.TimerBlock("Start training") as block: for i in range(start, end, batch): #len(mydataset)-1): data, target = mydataset.load_img_bat(i, batch) data, target = data.cuda(), target.cuda() optimizer.zero_grad() output = model(data) #print('output', output.shape) #print('target', target.shape) loss = criterion(output, target) loss.backward() optimizer.step() if i % 1 == 0: avgTime = block.avg() remainingTime = int((batch_num - (i / batch)) * avgTime) rTime_str = "{:02d}:{:02d}:{:02d}".format( remainingTime / 60 // 60, remainingTime // 60 % 60, remainingTime % 60) #rTime_str = str(remainingTime/60//60) + ':' + str(remainingTime//60%60) + ':' + str(remainingTime%60) #print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Time remaining: {:.4f}'.format( # i/batch, i/batch , batch_num, # 100. * i/batch / batch_num, loss.data[0], avgTime)) block.log( 'Train Epoch: {}\t[{}/{} ({:.0f}%)]\tLoss: {:.6f}, TimeAvg: {:.4f}, Remaining: {}' .format(i / batch, i / batch, batch_num, 100. * i / batch / batch_num, loss.data[0], avgTime, rTime_str)) if i % 500 == 0 and i != 0: check_str = 'checkpoint_{}.pt'.format(i / batch) #torch.save(model, check_str) #model.save_state_dict(check_str) torch.save(model.state_dict(), check_str) #torch.save(model, 'vinet_v1_01.pt') #model.save_state_dict('vinet_v1_01.pt') torch.save(model.state_dict(), 'vinet_v1_01.pt')
tools.add_arguments_for_module(parser, datasets, argument_for_class='inference_dataset', default='MpiSintelClean', skip_params=['is_cropped'], parameter_defaults={ 'root': './MPI-Sintel/flow/training', 'replicates': 1 }) main_dir = os.path.dirname(os.path.realpath(__file__)) os.chdir(main_dir) # Parse the official arguments with tools.TimerBlock("Parsing Arguments") as block: args = parser.parse_args() # Get argument defaults (hastag #thisisahack) parser.add_argument('--IGNORE', action='store_true') defaults = vars(parser.parse_args(['--IGNORE'])) # Print all arguments, color the non-defaults for argument, value in sorted(vars(args).items()): reset = colorama.Style.RESET_ALL color = reset if value == defaults[ argument] else colorama.Fore.MAGENTA block.log('{}{}: {}{}'.format(color, argument, value, reset)) args.model_class = tools.module_to_dict(models)[args.model] args.optimizer_class = tools.module_to_dict(
parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--log_frequency', '--summ_iter', type=int, default=1, help="Log every n batches") tools.add_arguments_for_module(parser, models, argument_for_class='model', default='FlowNet2') tools.add_arguments_for_module(parser, datasets, argument_for_class='inference_dataset', default='Google', skip_params=['is_cropped'], parameter_defaults={'root': './Google/train', 'replicates': 1}) main_dir = os.path.dirname(os.path.realpath(__file__)) os.chdir(main_dir) # Parse the official arguments with tools.TimerBlock("Parsing Arguments") as block: args = parser.parse_args() if args.number_gpus < 0 : args.number_gpus = torch.cuda.device_count() # Get argument defaults (hastag #thisisahack) parser.add_argument('--IGNORE', action='store_true') defaults = vars(parser.parse_args(['--IGNORE'])) # Print all arguments, color the non-defaults for argument, value in sorted(vars(args).items()): reset = colorama.Style.RESET_ALL color = reset if value == defaults[argument] else colorama.Fore.MAGENTA block.log('{}{}: {}{}'.format(color, argument, value, reset)) args.model_class = tools.module_to_dict(models)[args.model]
def train(): epoch = 10 batch = 1 model = Vinet() optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) #optimizer = optim.Adam(model.parameters(), lr = 0.001) writer = SummaryWriter() model.train() mydataset = MyDataset('/notebooks/EuRoC_modify/', 'V1_01_easy') #criterion = nn.MSELoss() criterion = nn.L1Loss(size_average=False) start = 5 end = len(mydataset) - batch batch_num = (end - start) #/ batch startT = time.time() abs_traj = None with tools.TimerBlock("Start training") as block: for k in range(epoch): for i in range(start, end): #len(mydataset)-1): data, data_imu, target_f2f, target_global = mydataset.load_img_bat( i, batch) data, data_imu, target_f2f, target_global = \ data.cuda(), data_imu.cuda(), target_f2f.cuda(), target_global.cuda() optimizer.zero_grad() if i == start: ## load first SE3 pose xyzQuaternion abs_traj = mydataset.getTrajectoryAbs(start) abs_traj_input = np.expand_dims(abs_traj, axis=0) abs_traj_input = np.expand_dims(abs_traj_input, axis=0) abs_traj_input = Variable( torch.from_numpy(abs_traj_input).type( torch.FloatTensor).cuda()) ## Forward output = model(data, data_imu, abs_traj_input) ## Accumulate pose numarr = output.data.cpu().numpy() abs_traj = se3qua.accu(abs_traj, numarr) abs_traj_input = np.expand_dims(abs_traj, axis=0) abs_traj_input = np.expand_dims(abs_traj_input, axis=0) abs_traj_input = Variable( torch.from_numpy(abs_traj_input).type( torch.FloatTensor).cuda()) ## (F2F loss) + (Global pose loss) ## Global pose: Full concatenated pose relative to the start of the sequence loss = criterion(output, target_f2f) + criterion( abs_traj_input, target_global) loss.backward() optimizer.step() avgTime = block.avg() remainingTime = int( (batch_num * epoch - (i + batch_num * k)) * avgTime) rTime_str = "{:02d}:{:02d}:{:02d}".format( int(remainingTime / 60 // 60), int(remainingTime // 60 % 60), int(remainingTime % 60)) block.log( 'Train Epoch: {}\t[{}/{} ({:.0f}%)]\tLoss: {:.6f}, TimeAvg: {:.4f}, Remaining: {}' .format(k, i, batch_num, 100. * (i + batch_num * k) / (batch_num * epoch), loss.data[0], avgTime, rTime_str)) writer.add_scalar('loss', loss.data[0], k * batch_num + i) check_str = 'checkpoint_{}.pt'.format(k) torch.save(model.state_dict(), check_str) #torch.save(model, 'vinet_v1_01.pt') #model.save_state_dict('vinet_v1_01.pt') torch.save(model.state_dict(), 'vinet_v1_01.pt') writer.export_scalars_to_json("./all_scalars.json") writer.close()
def train(): epoch = 5 batch = 4 timesteps = 4 # 2 imu_seq_len = 3 #mydataset = MyDataset('/notebooks/EuRoC_modify/', 'V1_01_easy', batch, timesteps, imu_seq_len) mydataset = MyDataset('/notebooks/data/Total_Data/', '00', batch, timesteps, imu_seq_len) start = 5 end = len(mydataset) - (timesteps * batch) step = timesteps - 1 model = Vinet() model.train() #optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) #optimizer = optim.Adam(model.parameters(), lr = 0.001, weight_decay=0.1) optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.1) #criterion = nn.MSELoss(size_average=False) criterion = nn.MSELoss() writer = SummaryWriter() startT = time.time() init_SE3 = None total_i = 0 with tools.TimerBlock("Start training") as block: for k in range(1, epoch + 1): for i in range(start, end, step): #print(k, " ", i, " -> ", i+timesteps-1, end=" ") img, imu, init_SE3, target_f2f, target_global = mydataset.load_img_bat( i) img, imu, init_SE3, target_f2f, target_global = img.cuda( ), imu.cuda(), init_SE3.cuda(), target_f2f.cuda( ), target_global.cuda() optimizer.zero_grad() ## LSTM part Forward se3, composed_SE3 = model(img, imu, init_SE3) # (batch, 6) ## (F2F loss) + (Global pose loss) ## Global pose: Full concatenated pose relative to the start of the sequence ## (batch, timesteps, 6, 1) // (batch, timesteps, 7, 1) #loss_se3 = #lossSE3 = loss = criterion(se3.cpu(), target_f2f.cpu()) + criterion( composed_SE3.cpu(), target_global.cpu()) loss.backward() optimizer.step() #avgTime = block.avg() #remainingTime = int((batch_num*epoch - (i + batch_num*k)) * avgTime) #remainingTime = int((epoch*len(mydataset)*avgTime) - (k*total_i*avgTime)) #rTime_str = "{:02d}:{:02d}:{:02d}".format(int(remainingTime/60//60), # int(remainingTime//60%60), # int(remainingTime%60)) #block.log('Train Epoch: {} iter: {}/{} \t Loss: {:.6f}, TimeAvg: {:.4f}, Remaining: {}'.format(k, start+i, end, loss.data[0], avgTime, rTime_str)) now = time.localtime() print("Train Epoch: {} iter: {}/{} \t Loss: {:.6f}".format( k, start + i, end, loss.data[0]), end=" ") print("\t Time: %02d-%02d %02d:%02d:%02d" % (now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec)) if (i % 100 == 0): check_str = "checkpoint_%02d.pt" % (k) torch.save(model.state_dict(), check_str) total_i = total_i + len(mydataset) - 1 - start #torch.save(model, 'vinet_v1_01.pt') #model.save_state_dict('vinet_v1_01.pt') #torch.save(model.state_dict(), 'vinet_v1_01.pt') #writer.export_scalars_to_json("./all_scalars.json") writer.close()
def load_model(self): self.gpuargs = {'num_workers': self.args.number_workers, 'pin_memory': True} if self.args.cuda else {} with tools.TimerBlock("Building {} model".format(self.args.model)) as block: class ModelAndLoss(nn.Module): def __init__(self, args): super(ModelAndLoss, self).__init__() kwargs = tools.kwargs_from_args(args, 'model') self.model = args.model_class(args, **kwargs) kwargs = tools.kwargs_from_args(args, 'loss') self.loss = args.loss_class(args, **kwargs) def forward(self, data, target, inference=False): output = self.model(data) loss_values = self.loss(output, target) if not inference: return loss_values else: return loss_values, output self.model_and_loss = ModelAndLoss(self.args) # block.log('Effective Batch Size: {}'.format(self.args.effective_batch_size)) block.log('Number of parameters: {}'.format( sum([p.data.nelement() if p.requires_grad else 0 for p in self.model_and_loss.parameters()]))) ## # assing to cuda or wrap with dataparallel, model and loss if self.args.cuda and (self.args.number_gpus > 0) and self.args.fp16: block.log('Parallelizing') model_and_loss = nn.parallel.DataParallel(self.model_and_loss, device_ids=list(range(self.args.number_gpus))) block.log('Initializing CUDA') model_and_loss = model_and_loss.cuda().half() torch.cuda.manual_seed(self.args.seed) param_copy = [param.clone().type(torch.cuda.FloatTensor).detach() for param in model_and_loss.parameters()] elif self.args.cuda and self.args.number_gpus > 0: block.log('Initializing CUDA') model_and_loss = self.model_and_loss.cuda() block.log('Parallelizing') model_and_loss = nn.parallel.DataParallel(model_and_loss, device_ids=list(range(self.args.number_gpus))) torch.cuda.manual_seed(self.args.seed) else: block.log('CUDA not being used') torch.manual_seed(self.args.seed) cwd = os.getcwd() print(cwd) # Load weights if needed, otherwise randomly initialize if self.args.resume and os.path.isfile(self.args.resume): block.log("Loading checkpoint '{}'".format(self.args.resume)) checkpoint = torch.load(self.args.resume) # if (not args.inference) and (not args.test): # args.start_epoch = checkpoint['epoch'] # best_err = checkpoint['best_EPE'] model_and_loss.module.model.load_state_dict(checkpoint['state_dict']) block.log("Loaded checkpoint '{}' (at epoch {})".format(self.args.resume, checkpoint['epoch'])) elif self.args.resume: block.log("No checkpoint found at '{}'".format(self.args.resume)) quit() else: block.log("Random initialization") block.log("Initializing save directory: {}".format(self.args.save)) if not os.path.exists(self.args.save): os.makedirs(self.args.save) self.warping_model = FlowWarping() print("Warping Model initialized")
def initialize_args(): if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--start_epoch', type=int, default=1) parser.add_argument('--total_epochs', type=int, default=10000) parser.add_argument('--batch_size', '-b', type=int, default=8, help="Batch size") parser.add_argument( '--train_n_batches', type=int, default=-1, help= 'Number of min-batches per epoch. If < 0, it will be determined by training_dataloader' ) parser.add_argument( '--crop_size', type=int, nargs='+', default=[256, 256], help="Spatial dimension to crop training samples for training") parser.add_argument('--gradient_clip', type=float, default=None) parser.add_argument('--schedule_lr_frequency', type=int, default=0, help='in number of iterations (0 for no schedule)') parser.add_argument('--schedule_lr_fraction', type=float, default=10) parser.add_argument("--rgb_max", type=float, default=255.) parser.add_argument('--number_workers', '-nw', '--num_workers', type=int, default=8) parser.add_argument('--number_gpus', '-ng', type=int, default=-1, help='number of GPUs to use') parser.add_argument('--no_cuda', action='store_true') parser.add_argument('--seed', type=int, default=1) parser.add_argument('--name', default='run', type=str, help='a name to append to the save directory') parser.add_argument('--save', '-s', default='./work', type=str, help='directory for saving') parser.add_argument('--validation_frequency', type=int, default=5, help='validate every n epochs') parser.add_argument('--validation_n_batches', type=int, default=-1) parser.add_argument( '--render_validation', action='store_true', help= 'run inference (save flows to file) and every validation_frequency epoch' ) parser.add_argument('--inference', action='store_true') parser.add_argument( '--inference_size', type=int, nargs='+', default=[-1, -1], help= 'spatial size divisible by 64. default (-1,-1) - largest possible valid size would be used' ) parser.add_argument('--inference_batch_size', type=int, default=1) parser.add_argument('--inference_n_batches', type=int, default=-1) parser.add_argument('--save_flow', action='store_true', help='save predicted flows to file') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--log_frequency', '--summ_iter', type=int, default=1, help="Log every n batches") parser.add_argument('--skip_training', action='store_true') parser.add_argument('--skip_validation', action='store_true') parser.add_argument( '--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') parser.add_argument( '--fp16_scale', type=float, default=1024., help= 'Loss scaling, positive power of 2 values can improve fp16 convergence.' ) tools.add_arguments_for_module(parser, models, argument_for_class='model', default='FlowNet2') tools.add_arguments_for_module(parser, losses, argument_for_class='loss', default='L1Loss') tools.add_arguments_for_module(parser, torch.optim, argument_for_class='optimizer', default='Adam', skip_params=['params']) tools.add_arguments_for_module( parser, datasets, argument_for_class='training_dataset', default='MpiSintelFinal', skip_params=['is_cropped'], parameter_defaults={'root': './MPI-Sintel/flow/training'}) tools.add_arguments_for_module(parser, datasets, argument_for_class='validation_dataset', default='MpiSintelClean', skip_params=['is_cropped'], parameter_defaults={ 'root': './MPI-Sintel/flow/training', 'replicates': 1 }) tools.add_arguments_for_module(parser, datasets, argument_for_class='inference_dataset', default='MpiSintelClean', skip_params=['is_cropped'], parameter_defaults={ 'root': './MPI-Sintel/flow/training', 'replicates': 1 }) main_dir = os.path.dirname(os.path.realpath(__file__)) os.chdir(main_dir) # Parse the official arguments with tools.TimerBlock("Parsing Arguments") as block: args = parser.parse_args() if args.number_gpus < 0: args.number_gpus = torch.cuda.device_count() # Get argument defaults (hastag #thisisahack) parser.add_argument('--IGNORE', action='store_true') defaults = vars(parser.parse_args(['--IGNORE'])) # Print all arguments, color the non-defaults for argument, value in sorted(vars(args).items()): reset = colorama.Style.RESET_ALL color = reset if value == defaults[ argument] else colorama.Fore.MAGENTA block.log('{}{}: {}{}'.format(color, argument, value, reset)) args.model_class = tools.module_to_dict(models)[args.model] args.optimizer_class = tools.module_to_dict( torch.optim)[args.optimizer] args.loss_class = tools.module_to_dict(losses)[args.loss] args.training_dataset_class = tools.module_to_dict(datasets)[ args.training_dataset] args.validation_dataset_class = tools.module_to_dict(datasets)[ args.validation_dataset] args.inference_dataset_class = tools.module_to_dict(datasets)[ args.inference_dataset] args.cuda = not args.no_cuda and torch.cuda.is_available() args.current_hash = subprocess.check_output( ["git", "rev-parse", "HEAD"]).rstrip() args.log_file = join(args.save, 'args.txt') # dict to collect activation gradients (for training debug purpose) args.grads = {} if args.inference: args.skip_validation = True args.skip_training = True args.total_epochs = 1 args.inference_dir = "{}/inference".format(args.save) print('Source Code') print((' Current Git Hash: {}\n'.format(args.current_hash))) # Change the title for `top` and `pkill` commands setproctitle.setproctitle(args.save) # Dynamically load the dataset class with parameters passed in via "--argument_[param]=[value]" arguments with tools.TimerBlock("Initializing Datasets") as block: args.effective_batch_size = args.batch_size * args.number_gpus args.effective_inference_batch_size = args.inference_batch_size * args.number_gpus args.effective_number_workers = args.number_workers * args.number_gpus gpuargs = { 'num_workers': args.effective_number_workers, 'pin_memory': True, 'drop_last': True } if args.cuda else {} inf_gpuargs = gpuargs.copy() inf_gpuargs['num_workers'] = args.number_workers if exists(args.training_dataset_root): train_dataset = args.training_dataset_class( args, True, **tools.kwargs_from_args(args, 'training_dataset')) block.log('Training Dataset: {}'.format(args.training_dataset)) block.log('Training Input: {}'.format(' '.join([ str([d for d in x.size()]) for x in train_dataset[0][0] ]))) block.log('Training Targets: {}'.format(' '.join([ str([d for d in x.size()]) for x in train_dataset[0][1] ]))) train_loader = DataLoader(train_dataset, batch_size=args.effective_batch_size, shuffle=True, **gpuargs) if exists(args.validation_dataset_root): validation_dataset = args.validation_dataset_class( args, True, **tools.kwargs_from_args(args, 'validation_dataset')) block.log('Validation Dataset: {}'.format( args.validation_dataset)) block.log('Validation Input: {}'.format(' '.join([ str([d for d in x.size()]) for x in validation_dataset[0][0] ]))) block.log('Validation Targets: {}'.format(' '.join([ str([d for d in x.size()]) for x in validation_dataset[0][1] ]))) validation_loader = DataLoader( validation_dataset, batch_size=args.effective_batch_size, shuffle=False, **gpuargs) if exists(args.inference_dataset_root): inference_dataset = args.inference_dataset_class( args, False, **tools.kwargs_from_args(args, 'inference_dataset')) block.log('Inference Dataset: {}'.format( args.inference_dataset)) block.log('Inference Input: {}'.format(' '.join([ str([d for d in x.size()]) for x in inference_dataset[0][0] ]))) block.log('Inference Targets: {}'.format(' '.join([ str([d for d in x.size()]) for x in inference_dataset[0][1] ]))) inference_loader = DataLoader( inference_dataset, batch_size=args.effective_inference_batch_size, shuffle=False, **inf_gpuargs) # Dynamically load model and loss class with parameters passed in via "--model_[param]=[value]" or "--loss_[param]=[value]" arguments with tools.TimerBlock("Building {} model".format(args.model)) as block: class ModelAndLoss(nn.Module): def __init__(self, args): super(ModelAndLoss, self).__init__() kwargs = tools.kwargs_from_args(args, 'model') self.model = args.model_class(args, **kwargs) kwargs = tools.kwargs_from_args(args, 'loss') self.loss = args.loss_class(args, **kwargs) def forward(self, data, target, inference=False): output = self.model(data) loss_values = self.loss(output, target) if not inference: return loss_values else: return loss_values, output model_and_loss = ModelAndLoss(args) block.log('Effective Batch Size: {}'.format( args.effective_batch_size)) block.log('Number of parameters: {}'.format( sum([ p.data.nelement() if p.requires_grad else 0 for p in model_and_loss.parameters() ]))) # assing to cuda or wrap with dataparallel, model and loss if args.cuda and (args.number_gpus > 0) and args.fp16: block.log('Parallelizing') model_and_loss = nn.parallel.DataParallel( model_and_loss, device_ids=list(range(args.number_gpus))) block.log('Initializing CUDA') model_and_loss = model_and_loss.cuda().half() torch.cuda.manual_seed(args.seed) param_copy = [ param.clone().type(torch.cuda.FloatTensor).detach() for param in model_and_loss.parameters() ] elif args.cuda and args.number_gpus > 0: block.log('Initializing CUDA') model_and_loss = model_and_loss.cuda() block.log('Parallelizing') model_and_loss = nn.parallel.DataParallel( model_and_loss, device_ids=list(range(args.number_gpus))) torch.cuda.manual_seed(args.seed) else: block.log('CUDA not being used') torch.manual_seed(args.seed) # Load weights if needed, otherwise randomly initialize if args.resume and os.path.isfile(args.resume): block.log("Loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) if not args.inference: args.start_epoch = checkpoint['epoch'] best_err = checkpoint['best_EPE'] model_and_loss.module.model.load_state_dict( checkpoint['state_dict']) block.log("Loaded checkpoint '{}' (at epoch {})".format( args.resume, checkpoint['epoch'])) elif args.resume and args.inference: block.log("No checkpoint found at '{}'".format(args.resume)) quit() else: block.log("Random initialization") block.log("Initializing save directory: {}".format(args.save)) if not os.path.exists(args.save): os.makedirs(args.save) train_logger = SummaryWriter(log_dir=os.path.join( args.save, 'train'), comment='training') validation_logger = SummaryWriter(log_dir=os.path.join( args.save, 'validation'), comment='validation') # Dynamically load the optimizer with parameters passed in via "--optimizer_[param]=[value]" arguments with tools.TimerBlock("Initializing {} Optimizer".format( args.optimizer)) as block: kwargs = tools.kwargs_from_args(args, 'optimizer') if args.fp16: optimizer = args.optimizer_class( [p for p in param_copy if p.requires_grad], **kwargs) else: optimizer = args.optimizer_class([ p for p in model_and_loss.parameters() if p.requires_grad ], **kwargs) for param, default in list(kwargs.items()): block.log("{} = {} ({})".format(param, default, type(default))) # Log all arguments to file for argument, value in sorted(vars(args).items()): block.log2file(args.log_file, '{}: {}'.format(argument, value)) return args
def infer_flownet(in_path, out_path, reverse, downscale_factor=1): args = SimpleNamespace(model="FlowNet2", reverse=reverse, downscale_factor=downscale_factor, start_epoch=1, total_epochs=10000, batch_size=8, train_n_batches=-1, crop_size=[256, 256], gradient_clip=None, schedule_lr_frequency=0, schedule_lr_fraction=10, rgb_max=255., number_workers=8, number_gpus=-1, no_cuda=False, seed=1, name='run', in_path=in_path, save=out_path, validation_frequency=5, validation_n_batches=-1, render_validation=False, inference=True, inference_visualize=True, inference_size=[-1, -1], inference_batch_size=1, inference_n_batches=-1, save_flow=True, resume='./FlowNet2_checkpoint.pth.tar', log_frequency=1, skip_training=False, skip_validation=False, fp16=False, fp16_scale=1024., loss='L1Loss', optimizer='Adam', training_dataset='MpiSintelFinal', root='./MPI-Sintel/flow/training', validation_dataset='MpiSintelClean', inference_dataset='MpiSintelClean', IGNORE=False) print(args.in_path) main_dir = os.path.dirname(os.path.realpath(__file__)) os.chdir(main_dir) # Parse the official arguments with tools.TimerBlock("Parsing Arguments") as block: if args.number_gpus < 0: args.number_gpus = torch.cuda.device_count() args.model_class = tools.module_to_dict(models)[args.model] args.optimizer_class = tools.module_to_dict( torch.optim)[args.optimizer] args.loss_class = tools.module_to_dict(losses)[args.loss] args.cuda = not args.no_cuda and torch.cuda.is_available() args.current_hash = subprocess.check_output( ["git", "rev-parse", "HEAD"]).rstrip() # dict to collect activation gradients (for training debug purpose) args.grads = {} if args.inference: args.skip_validation = True args.skip_training = True args.total_epochs = 1 args.inference_dir = "{}/inference".format(args.save) print('Source Code') print((' Current Git Hash: {}\n'.format(args.current_hash))) # Change the title for `top` and `pkill` commands setproctitle.setproctitle(args.save) # Dynamically load the dataset class with parameters passed in via "--argument_[param]=[value]" arguments with tools.TimerBlock("Initializing Datasets") as block: args.effective_batch_size = args.batch_size * args.number_gpus args.effective_inference_batch_size = args.inference_batch_size * args.number_gpus args.effective_number_workers = args.number_workers * args.number_gpus gpuargs = { 'num_workers': args.effective_number_workers, 'pin_memory': True, 'drop_last': True } if args.cuda else {} inf_gpuargs = gpuargs.copy() inf_gpuargs['num_workers'] = args.number_workers inference_dataset = datasets.ImagesFromFolder(args, is_cropped=False, is_reversed=args.reverse, root=args.in_path) block.log('Inference Input: {}'.format(' '.join( [str([d for d in x.size()]) for x in inference_dataset[0][0]]))) block.log('Inference Targets: {}'.format(' '.join( [str([d for d in x.size()]) for x in inference_dataset[0][1]]))) inference_loader = DataLoader( inference_dataset, batch_size=args.effective_inference_batch_size, shuffle=False, **inf_gpuargs) # Dynamically load model and loss class with parameters passed in via "--model_[param]=[value]" or "--loss_[param]=[value]" arguments with tools.TimerBlock("Building {} model".format(args.model)) as block: class ModelAndLoss(nn.Module): def __init__(self, args): super(ModelAndLoss, self).__init__() kwargs = tools.kwargs_from_args(args, 'model') self.model = args.model_class(args, **kwargs) kwargs = tools.kwargs_from_args(args, 'loss') self.loss = args.loss_class(args, **kwargs) def forward(self, data, target, inference=False): output = self.model(data) loss_values = self.loss(output, target) if not inference: return loss_values else: return loss_values, output model_and_loss = ModelAndLoss(args) # assing to cuda or wrap with dataparallel, model and loss if args.cuda and (args.number_gpus > 0) and args.fp16: model_and_loss = nn.parallel.DataParallel( model_and_loss, device_ids=list(range(args.number_gpus))) model_and_loss = model_and_loss.cuda().half() torch.cuda.manual_seed(args.seed) elif args.cuda and args.number_gpus > 0: model_and_loss = model_and_loss.cuda() model_and_loss = nn.parallel.DataParallel( model_and_loss, device_ids=list(range(args.number_gpus))) torch.cuda.manual_seed(args.seed) else: block.log('CUDA not being used') torch.manual_seed(args.seed) # Load weights if needed, otherwise randomly initialize if args.resume and os.path.isfile(args.resume): checkpoint = torch.load(args.resume) if not args.inference: args.start_epoch = checkpoint['epoch'] model_and_loss.module.model.load_state_dict( checkpoint['state_dict']) elif args.resume and args.inference: block.log("No checkpoint found at '{}'".format(args.resume)) quit() else: block.log("Random initialization") if not os.path.exists(args.save): os.makedirs(args.save) # Reusable function for inference def inference(args, data_loader, model, offset=0): model.eval() if args.save_flow or args.render_validation: flow_folder = out_path # "./output/flo_rev" if args.reverse else "./output/flo" if not os.path.exists(flow_folder): os.makedirs(flow_folder) # visualization folder if args.inference_visualize: flow_vis_folder = out_path + "/" + "png/" if not os.path.exists(flow_vis_folder): os.makedirs(flow_vis_folder) args.inference_n_batches = np.inf if args.inference_n_batches < 0 else args.inference_n_batches progress = tqdm(data_loader, ncols=100, total=np.minimum(len(data_loader), args.inference_n_batches), desc='Inferencing ', leave=True, position=offset) statistics = [] total_loss = 0 ph, pw = inference_dataset.ph, inference_dataset.pw for batch_idx, (data, target) in enumerate(progress): if args.cuda: data, target = [d.cuda(non_blocking=True) for d in data ], [t.cuda(non_blocking=True) for t in target] data, target = [Variable(d) for d in data], [Variable(t) for t in target] # when ground-truth flows are not available for inference_dataset, # the targets are set to all zeros. thus, losses are actually L1 or L2 norms of compute optical flows, # depending on the type of loss norm passed in with torch.no_grad(): losses, output = model(data[0], target[0], inference=True) losses = [torch.mean(loss_value) for loss_value in losses] loss_val = losses[0] # Collect first loss for weight update total_loss += loss_val.item() loss_values = [v.item() for v in losses] statistics.append(loss_values) # import IPython; IPython.embed() if args.save_flow or args.render_validation: for i in range(args.inference_batch_size): _pflow = output[i].data.cpu().numpy().transpose(1, 2, 0) if ph != 0: _pflow = _pflow[ph:-ph, :, :] if pw != 0: _pflow = _pflow[:, pw:-pw, :] flow_utils.writeFlow( join( flow_folder, '%06d.flo' % (batch_idx * args.inference_batch_size + i)), _pflow) # You can comment out the plt block in visulize_flow_file() for real-time visualization if args.inference_visualize: flow_utils.visulize_flow_file( join( flow_folder, '%06d.flo' % (batch_idx * args.inference_batch_size + i)), flow_vis_folder) progress.update(1) if batch_idx == (args.inference_n_batches - 1): break progress.close() return progress = tqdm(list(range(args.start_epoch, args.total_epochs + 1)), miniters=1, ncols=100, desc='Overall Progress', leave=True, position=0) offset = 1 for _ in progress: inference(args=args, data_loader=inference_loader, model=model_and_loss, offset=offset) offset += 1 print("\n")
tools.add_arguments_for_module(parser, datasets, argument_for_class='inference_dataset', default='MpiSintelClean', skip_params=['is_cropped'], parameter_defaults={ 'root': './MPI-Sintel/flow/training', 'replicates': 1 }) main_dir = os.path.dirname(os.path.realpath(__file__)) os.chdir(main_dir) # Parse the official arguments with tools.TimerBlock("Parsing Arguments") as block: args = parser.parse_args() if args.number_gpus < 0: args.number_gpus = torch.cuda.device_count() # Get argument defaults (hastag #thisisahack) parser.add_argument('--IGNORE', action='store_true') defaults = vars(parser.parse_args(['--IGNORE'])) # Print all arguments, color the non-defaults for argument, value in sorted(vars(args).items()): reset = colorama.Style.RESET_ALL color = reset if value == defaults[ argument] else colorama.Fore.MAGENTA block.log('{}{}: {}{}'.format(color, argument, value, reset)) args.model_class = tools.module_to_dict(models)[args.model]