def __init__(self, args): super(ModelAndLossMultiTwoOutput, self).__init__() kwargs = tools.kwargs_from_args(args, 'model') self.model = args.model_class(args, **kwargs) kwargs = tools.kwargs_from_args(args, 'loss') self.loss = args.loss_class(args, **kwargs) self.rgb_max = args.rgb_max
def __init__(self, args): super(ModelAndLoss, self).__init__() kwargs = tools.kwargs_from_args(args, 'model') self.model = args.model_class(args, **kwargs) if args.inference: self.model.eval() kwargs = tools.kwargs_from_args(args, 'loss') self.loss = args.loss_class(args, **kwargs)
def configure_lr_scheduler(args, optimizer): lr_scheduler = None with logger.LoggingBlock("Learning Rate Scheduler", emph=True): logging.info("class: %s" % args.lr_scheduler) if args.lr_scheduler is not None: # ---------------------------------------------- # Figure out lr_scheduler arguments # ---------------------------------------------- kwargs = tools.kwargs_from_args(args, "lr_scheduler") # ------------------------------------------- # Print arguments # ------------------------------------------- for param, default in sorted(kwargs.items()): logging.info("%s: %s" % (param, default)) # ------------------------------------------- # Add optimizer # ------------------------------------------- kwargs["optimizer"] = optimizer # ------------------------------------------- # Create lr_scheduler instance # ------------------------------------------- lr_scheduler = tools.instance_from_kwargs(args.lr_scheduler_class, kwargs) return lr_scheduler
def demo(parser): tools.add_arguments_for_module(parser, models, argument_for_class='model', default='FlowNet2') args = parser.parse_args() if args.number_gpus < 0: args.number_gpus = torch.cuda.device_count() kwargs = tools.kwargs_from_args(args, 'model') model = tools.module_to_dict(models)[args.model] model = model(args, **kwargs) args.cuda = not args.no_cuda and torch.cuda.is_available() print('Initializing CUDA') model = model.cuda() print("Loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) print("Loaded checkpoint '{}' (at epoch {})".format( args.resume, checkpoint['epoch'])) print("Initializing save directory: {}".format(args.save)) if not os.path.exists(args.save): os.makedirs(args.save) stats = inference(args=args, model=model) print("Demo test done")
def configure_runtime_augmentations(args): with logger.LoggingBlock("Runtime Augmentations", emph=True): training_augmentation = None validation_augmentation = None # ---------------------------------------------------- # Training Augmentation # ---------------------------------------------------- if args.training_augmentation is not None: kwargs = tools.kwargs_from_args(args, "training_augmentation") logging.info("training_augmentation: %s" % args.training_augmentation) for param, default in sorted(kwargs.items()): logging.info(" %s: %s" % (param, default)) kwargs["args"] = args training_augmentation = tools.instance_from_kwargs( args.training_augmentation_class, kwargs) if args.cuda: training_augmentation = training_augmentation.cuda() else: logging.info("training_augmentation: None") # ---------------------------------------------------- # Training Augmentation # ---------------------------------------------------- if args.validation_augmentation is not None: kwargs = tools.kwargs_from_args(args, "validation_augmentation") logging.info("validation_augmentation: %s" % args.training_augmentation) for param, default in sorted(kwargs.items()): logging.info(" %s: %s" % (param, default)) kwargs["args"] = args validation_augmentation = tools.instance_from_kwargs( args.validation_augmentation_class, kwargs) if args.cuda: validation_augmentation = validation_augmentation.cuda() else: logging.info("validation_augmentation: None") return training_augmentation, validation_augmentation
def configure_optimizer(args, model_and_loss): optimizer = None with logger.LoggingBlock("Optimizer", emph=True): if args.optimizer is not None: if model_and_loss.num_parameters() == 0: logging.info("No trainable parameters detected.") logging.info("Setting optimizer to None.") else: logging.info(args.optimizer) # ------------------------------------------- # Figure out all optimizer arguments # ------------------------------------------- all_kwargs = tools.kwargs_from_args(args, "optimizer") # ------------------------------------------- # Get the split of param groups # ------------------------------------------- kwargs_without_groups = { key: value for key, value in all_kwargs.items() if key != "group" } param_groups = all_kwargs["group"] # ---------------------------------------------------------------------- # Print arguments (without groups) # ---------------------------------------------------------------------- for param, default in sorted(kwargs_without_groups.items()): logging.info("%s: %s" % (param, default)) # ---------------------------------------------------------------------- # Construct actual optimizer params # ---------------------------------------------------------------------- kwargs = dict(kwargs_without_groups) if param_groups is None: # --------------------------------------------------------- # Add all trainable parameters if there is no param groups # --------------------------------------------------------- all_trainable_parameters = _generate_trainable_params( model_and_loss) kwargs["params"] = all_trainable_parameters else: # ------------------------------------------- # Add list of parameter groups instead # ------------------------------------------- trainable_parameter_groups = [] dnames, dparams = _param_names_and_trainable_generator( model_and_loss) dnames = set(dnames) dparams = set(list(dparams)) with logger.LoggingBlock("parameter_groups:"): for group in param_groups: # log group settings group_match = group["params"] group_args = { key: value for key, value in group.items() if key != "params" } with logger.LoggingBlock( "%s: %s" % (group_match, group_args)): # retrieve parameters by matching name gnames, gparams = _param_names_and_trainable_generator( model_and_loss, match=group_match) # log all names affected for n in sorted(gnames): logging.info(n) # set generator for group group_args["params"] = gparams # append parameter group trainable_parameter_groups.append(group_args) # update remaining trainable parameters dnames -= set(gnames) dparams -= set(list(gparams)) # append default parameter group trainable_parameter_groups.append( {"params": list(dparams)}) # and log its parameter names with logger.LoggingBlock("default:"): for dname in sorted(dnames): logging.info(dname) # set params in optimizer kwargs kwargs["params"] = trainable_parameter_groups # ------------------------------------------- # Create optimizer instance # ------------------------------------------- optimizer = tools.instance_from_kwargs(args.optimizer_class, kwargs) return optimizer
def configure_data_loaders(args): with logger.LoggingBlock("Datasets", emph=True): def _sizes_to_str(value): if np.isscalar(value): return '[1L]' else: return ' '.join([str([d for d in value.size()])]) def _log_statistics(dataset, prefix, name): with logger.LoggingBlock("%s Dataset: %s" % (prefix, name)): example_dict = dataset[ 0] # get sizes from first dataset example for key, value in sorted(example_dict.items()): if key in ["index", "basename"]: # no need to display these continue if isinstance(value, str): logging.info("{}: {}".format(key, value)) else: logging.info("%s: %s" % (key, _sizes_to_str(value))) logging.info("num_examples: %i" % len(dataset)) # ----------------------------------------------------------------------------------------- # GPU parameters -- turning off pin_memory? for resolving the deadlock? # ----------------------------------------------------------------------------------------- gpuargs = { "num_workers": args.num_workers, "pin_memory": False } if args.cuda else {} train_loader = None validation_loader = None inference_loader = None # ----------------------------------------------------------------------------------------- # Training dataset # ----------------------------------------------------------------------------------------- if args.training_dataset is not None: # ---------------------------------------------- # Figure out training_dataset arguments # ---------------------------------------------- kwargs = tools.kwargs_from_args(args, "training_dataset") kwargs["is_cropped"] = True kwargs["args"] = args # ---------------------------------------------- # Create training dataset # ---------------------------------------------- train_dataset = tools.instance_from_kwargs( args.training_dataset_class, kwargs) # ---------------------------------------------- # Create training loader # ---------------------------------------------- train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, drop_last=False, **gpuargs) _log_statistics(train_dataset, prefix="Training", name=args.training_dataset) # ----------------------------------------------------------------------------------------- # Validation dataset # ----------------------------------------------------------------------------------------- if args.validation_dataset is not None: # ---------------------------------------------- # Figure out validation_dataset arguments # ---------------------------------------------- kwargs = tools.kwargs_from_args(args, "validation_dataset") kwargs["is_cropped"] = True kwargs["args"] = args # ---------------------------------------------- # Create validation dataset # ---------------------------------------------- validation_dataset = tools.instance_from_kwargs( args.validation_dataset_class, kwargs) # ---------------------------------------------- # Create validation loader # ---------------------------------------------- validation_loader = DataLoader(validation_dataset, batch_size=args.batch_size_val, shuffle=False, drop_last=False, **gpuargs) _log_statistics(validation_dataset, prefix="Validation", name=args.validation_dataset) return train_loader, validation_loader, inference_loader
def configure_model_and_loss(args): # ---------------------------------------------------- # Dynamically load model and loss class with parameters # passed in via "--model_[param]=[value]" or "--loss_[param]=[value]" arguments # ---------------------------------------------------- with logger.LoggingBlock("Model and Loss", emph=True): # ---------------------------------------------------- # Model # ---------------------------------------------------- kwargs = tools.kwargs_from_args(args, "model") kwargs["args"] = args if type(args.checkpoint) == list and len(args.checkpoint) > 1: models = nn.ModuleList([ tools.instance_from_kwargs(args.model_class, kwargs) for _ in args.checkpoint ]) else: models = tools.instance_from_kwargs(args.model_class, kwargs) if hasattr(args, 'avoid_list') and args.avoid_list: for model in models: model.avoid_list = args.avoid_list.split(',') # ---------------------------------------------------- # Training loss # ---------------------------------------------------- training_loss = None if args.training_loss is not None: kwargs = tools.kwargs_from_args(args, "training_loss") kwargs["args"] = args training_loss = tools.instance_from_kwargs( args.training_loss_class, kwargs) # ---------------------------------------------------- # Validation loss # ---------------------------------------------------- validation_loss = None if args.validation_loss is not None: kwargs = tools.kwargs_from_args(args, "validation_loss") kwargs["args"] = args validation_loss = tools.instance_from_kwargs( args.validation_loss_class, kwargs) # ---------------------------------------------------- # Model and loss # ---------------------------------------------------- model_and_loss = ModelAndLoss(args, models, training_loss, validation_loss) # ----------------------------------------------------------- # If Cuda, transfer model to Cuda and wrap with DataParallel. # ----------------------------------------------------------- if args.cuda: model_and_loss = model_and_loss.cuda() # --------------------------------------------------------------- # Report some network statistics # --------------------------------------------------------------- logging.info("Batch Size: %i" % args.batch_size) logging.info("GPGPU: Cuda") if args.cuda else logging.info( "GPGPU: off") logging.info("Network: %s" % args.model) logging.info("Number of parameters: %i" % tools.x2module(model_and_loss).num_parameters()) if training_loss is not None: logging.info("Training Key: %s" % args.training_key) logging.info("Training Loss: %s" % args.training_loss) if validation_loss is not None: logging.info("Validation Key: %s" % args.validation_key) logging.info("Validation Loss: %s" % args.validation_loss) return model_and_loss
# Change the title for `top` and `pkill` commands setproctitle.setproctitle(args.save) # Dynamically load the dataset class with parameters passed in via "--argument_[param]=[value]" arguments with tools.TimerBlock("Initializing Datasets") as block: args.effective_batch_size = args.batch_size * args.number_gpus args.effective_inference_batch_size = args.inference_batch_size * args.number_gpus args.effective_number_workers = args.number_workers * args.number_gpus gpuargs = {'num_workers': args.effective_number_workers, 'pin_memory': True, 'drop_last': True} if args.cuda else {} inf_gpuargs = gpuargs.copy() inf_gpuargs['num_workers'] = args.number_workers if exists(args.training_dataset_root): train_dataset = args.training_dataset_class(args, True, **tools.kwargs_from_args(args, 'training_dataset')) block.log('Training Dataset: {}'.format(args.training_dataset)) block.log( 'Training Input: {}'.format(' '.join([str([d for d in x.size()]) for x in train_dataset[0][0]]))) block.log( 'Training Targets: {}'.format(' '.join([str([d for d in x.size()]) for x in train_dataset[0][1]]))) train_loader = DataLoader( train_dataset, batch_size=args.effective_batch_size, shuffle=True, **gpuargs) if exists(args.validation_dataset_root): validation_dataset = args.validation_dataset_class(args, True, **tools.kwargs_from_args(args, 'validation_dataset')) block.log('Validation Dataset: {}'.format( args.validation_dataset)) block.log( 'Validation Input: {}'.format(
def __init__(self, args): super(Model, self).__init__() kwargs = tools.kwargs_from_args(args, 'model') self.model = args.model_class(args, **kwargs)
tools.add_arguments_for_module(parser, models, argument_for_class='model', default='FlowNet2S') # Parse the official arguments with tools.TimerBlock("Parsing Arguments") as block: args = parser.parse_args() args.model_class = tools.module_to_dict(models)[args.model] # Dynamically load model with parameters passed in via "--model_[param]=[value]" arguments with tools.TimerBlock("Building {} model".format(args.model)) as block: kwargs = tools.kwargs_from_args(args, 'model') model = args.model_class(args, **kwargs) block.log('Number of parameters: {}'.format( sum([ p.data.nelement() if p.requires_grad else 0 for p in model.parameters() ]))) # Load weights if needed, otherwise randomly initialize if args.resume and os.path.isfile(args.resume): block.log("Loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) best_err = checkpoint['best_EPE'] model.load_state_dict(checkpoint['state_dict']) block.log("Loaded checkpoint '{}' (at epoch {})".format(
args.inference_dir = "{}/inference".format(args.save) print('Source Code') print(' Current Git Hash: {}\n'.format(args.current_hash)) # Change the title for `top` and `pkill` commands setproctitle.setproctitle(args.save) # Dynamically load the dataset class with parameters passed in via "--argument_[param]=[value]" arguments with tools.TimerBlock("Initializing Datasets") as block: args.effective_batch_size = args.batch_size args.batch_size = args.effective_batch_size // args.number_gpus gpuargs = {'num_workers': args.number_workers, 'pin_memory': True} if args.cuda else {} if exists(args.training_dataset_root): train_dataset = args.training_dataset_class(args, True, **tools.kwargs_from_args(args, 'training_dataset')) block.log('Training Dataset: {}'.format(args.training_dataset)) block.log('Training Input: {}'.format(' '.join([str([d for d in x.size()]) for x in train_dataset[0][0]]))) block.log('Training Targets: {}'.format(' '.join([str([d for d in x.size()]) for x in train_dataset[0][1]]))) train_loader = DataLoader(train_dataset, batch_size=args.effective_batch_size, shuffle=True, **gpuargs) if exists(args.validation_dataset_root): validation_dataset = args.validation_dataset_class(args, True, **tools.kwargs_from_args(args, 'validation_dataset')) block.log('Validation Dataset: {}'.format(args.validation_dataset)) block.log('Validation Input: {}'.format(' '.join([str([d for d in x.size()]) for x in validation_dataset[0][0]]))) block.log('Validation Targets: {}'.format(' '.join([str([d for d in x.size()]) for x in validation_dataset[0][1]]))) validation_loader = DataLoader(validation_dataset, batch_size=args.effective_batch_size, shuffle=False, **gpuargs) if exists(args.inference_dataset_root): inference_dataset = args.inference_dataset_class(args, False, **tools.kwargs_from_args(args, 'inference_dataset')) block.log('Inference Dataset: {}'.format(args.inference_dataset))
def __init__(self, args): super(ModelAndLoss, self).__init__() kwargs = tools.kwargs_from_args(args, "model") self.model = args.model_class(args, **kwargs) kwargs = tools.kwargs_from_args(args, "loss") self.loss = args.loss_class(args, **kwargs)
# Change the title for `top` and `pkill` commands setproctitle.setproctitle(args.save) # Dynamically load the dataset class with parameters passed in via "--argument_[param]=[value]" arguments with tools.TimerBlock("Initializing Datasets") as block: args.effective_batch_size = args.batch_size * args.number_gpus args.effective_inference_batch_size = args.inference_batch_size * args.number_gpus args.effective_number_workers = args.number_workers * args.number_gpus gpuargs = ( {"num_workers": args.effective_number_workers, "pin_memory": True, "drop_last": True} if args.cuda else {} ) inf_gpuargs = gpuargs.copy() inf_gpuargs["num_workers"] = args.number_workers if exists(args.training_dataset_root): train_dataset = args.training_dataset_class(args, True, **tools.kwargs_from_args(args, "training_dataset")) block.log("Training Dataset: {}".format(args.training_dataset)) block.log("Training Input: {}".format(" ".join([str([d for d in x.size()]) for x in train_dataset[0][0]]))) block.log( "Training Targets: {}".format(" ".join([str([d for d in x.size()]) for x in train_dataset[0][1]])) ) train_loader = DataLoader(train_dataset, batch_size=args.effective_batch_size, shuffle=True, **gpuargs) if exists(args.validation_dataset_root): validation_dataset = args.validation_dataset_class( args, True, **tools.kwargs_from_args(args, "validation_dataset") ) block.log("Validation Dataset: {}".format(args.validation_dataset)) block.log( "Validation Input: {}".format(" ".join([str([d for d in x.size()]) for x in validation_dataset[0][0]])) )
# Dynamically load the dataset class with parameters passed in via "--argument_[param]=[value]" arguments with tools.TimerBlock("Initializing Datasets") as block: args.effective_batch_size = args.batch_size * args.number_gpus args.effective_inference_batch_size = args.inference_batch_size * args.number_gpus args.effective_number_workers = args.number_workers * args.number_gpus gpuargs = { 'num_workers': args.effective_number_workers, 'pin_memory': True, 'drop_last': True } if args.cuda else {} inf_gpuargs = gpuargs.copy() inf_gpuargs['num_workers'] = args.number_workers if exists(args.training_dataset_root): train_dataset = args.training_dataset_class( args, True, **tools.kwargs_from_args(args, 'training_dataset')) block.log('Training Dataset: {}'.format(args.training_dataset)) block.log('Training Input: {}'.format(' '.join( [str([d for d in x.size()]) for x in train_dataset[0][0]]))) block.log('Training Targets: {}'.format(' '.join( [str([d for d in x.size()]) for x in train_dataset[0][1]]))) train_loader = DataLoader(train_dataset, batch_size=args.effective_batch_size, shuffle=True, **gpuargs) if exists(args.validation_dataset_root): validation_dataset = args.validation_dataset_class( args, True, **tools.kwargs_from_args(args, 'validation_dataset')) block.log('Validation Dataset: {}'.format(args.validation_dataset))
def initialize_args(): if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--start_epoch', type=int, default=1) parser.add_argument('--total_epochs', type=int, default=10000) parser.add_argument('--batch_size', '-b', type=int, default=8, help="Batch size") parser.add_argument( '--train_n_batches', type=int, default=-1, help= 'Number of min-batches per epoch. If < 0, it will be determined by training_dataloader' ) parser.add_argument( '--crop_size', type=int, nargs='+', default=[256, 256], help="Spatial dimension to crop training samples for training") parser.add_argument('--gradient_clip', type=float, default=None) parser.add_argument('--schedule_lr_frequency', type=int, default=0, help='in number of iterations (0 for no schedule)') parser.add_argument('--schedule_lr_fraction', type=float, default=10) parser.add_argument("--rgb_max", type=float, default=255.) parser.add_argument('--number_workers', '-nw', '--num_workers', type=int, default=8) parser.add_argument('--number_gpus', '-ng', type=int, default=-1, help='number of GPUs to use') parser.add_argument('--no_cuda', action='store_true') parser.add_argument('--seed', type=int, default=1) parser.add_argument('--name', default='run', type=str, help='a name to append to the save directory') parser.add_argument('--save', '-s', default='./work', type=str, help='directory for saving') parser.add_argument('--validation_frequency', type=int, default=5, help='validate every n epochs') parser.add_argument('--validation_n_batches', type=int, default=-1) parser.add_argument( '--render_validation', action='store_true', help= 'run inference (save flows to file) and every validation_frequency epoch' ) parser.add_argument('--inference', action='store_true') parser.add_argument( '--inference_size', type=int, nargs='+', default=[-1, -1], help= 'spatial size divisible by 64. default (-1,-1) - largest possible valid size would be used' ) parser.add_argument('--inference_batch_size', type=int, default=1) parser.add_argument('--inference_n_batches', type=int, default=-1) parser.add_argument('--save_flow', action='store_true', help='save predicted flows to file') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--log_frequency', '--summ_iter', type=int, default=1, help="Log every n batches") parser.add_argument('--skip_training', action='store_true') parser.add_argument('--skip_validation', action='store_true') parser.add_argument( '--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') parser.add_argument( '--fp16_scale', type=float, default=1024., help= 'Loss scaling, positive power of 2 values can improve fp16 convergence.' ) tools.add_arguments_for_module(parser, models, argument_for_class='model', default='FlowNet2') tools.add_arguments_for_module(parser, losses, argument_for_class='loss', default='L1Loss') tools.add_arguments_for_module(parser, torch.optim, argument_for_class='optimizer', default='Adam', skip_params=['params']) tools.add_arguments_for_module( parser, datasets, argument_for_class='training_dataset', default='MpiSintelFinal', skip_params=['is_cropped'], parameter_defaults={'root': './MPI-Sintel/flow/training'}) tools.add_arguments_for_module(parser, datasets, argument_for_class='validation_dataset', default='MpiSintelClean', skip_params=['is_cropped'], parameter_defaults={ 'root': './MPI-Sintel/flow/training', 'replicates': 1 }) tools.add_arguments_for_module(parser, datasets, argument_for_class='inference_dataset', default='MpiSintelClean', skip_params=['is_cropped'], parameter_defaults={ 'root': './MPI-Sintel/flow/training', 'replicates': 1 }) main_dir = os.path.dirname(os.path.realpath(__file__)) os.chdir(main_dir) # Parse the official arguments with tools.TimerBlock("Parsing Arguments") as block: args = parser.parse_args() if args.number_gpus < 0: args.number_gpus = torch.cuda.device_count() # Get argument defaults (hastag #thisisahack) parser.add_argument('--IGNORE', action='store_true') defaults = vars(parser.parse_args(['--IGNORE'])) # Print all arguments, color the non-defaults for argument, value in sorted(vars(args).items()): reset = colorama.Style.RESET_ALL color = reset if value == defaults[ argument] else colorama.Fore.MAGENTA block.log('{}{}: {}{}'.format(color, argument, value, reset)) args.model_class = tools.module_to_dict(models)[args.model] args.optimizer_class = tools.module_to_dict( torch.optim)[args.optimizer] args.loss_class = tools.module_to_dict(losses)[args.loss] args.training_dataset_class = tools.module_to_dict(datasets)[ args.training_dataset] args.validation_dataset_class = tools.module_to_dict(datasets)[ args.validation_dataset] args.inference_dataset_class = tools.module_to_dict(datasets)[ args.inference_dataset] args.cuda = not args.no_cuda and torch.cuda.is_available() args.current_hash = subprocess.check_output( ["git", "rev-parse", "HEAD"]).rstrip() args.log_file = join(args.save, 'args.txt') # dict to collect activation gradients (for training debug purpose) args.grads = {} if args.inference: args.skip_validation = True args.skip_training = True args.total_epochs = 1 args.inference_dir = "{}/inference".format(args.save) print('Source Code') print((' Current Git Hash: {}\n'.format(args.current_hash))) # Change the title for `top` and `pkill` commands setproctitle.setproctitle(args.save) # Dynamically load the dataset class with parameters passed in via "--argument_[param]=[value]" arguments with tools.TimerBlock("Initializing Datasets") as block: args.effective_batch_size = args.batch_size * args.number_gpus args.effective_inference_batch_size = args.inference_batch_size * args.number_gpus args.effective_number_workers = args.number_workers * args.number_gpus gpuargs = { 'num_workers': args.effective_number_workers, 'pin_memory': True, 'drop_last': True } if args.cuda else {} inf_gpuargs = gpuargs.copy() inf_gpuargs['num_workers'] = args.number_workers if exists(args.training_dataset_root): train_dataset = args.training_dataset_class( args, True, **tools.kwargs_from_args(args, 'training_dataset')) block.log('Training Dataset: {}'.format(args.training_dataset)) block.log('Training Input: {}'.format(' '.join([ str([d for d in x.size()]) for x in train_dataset[0][0] ]))) block.log('Training Targets: {}'.format(' '.join([ str([d for d in x.size()]) for x in train_dataset[0][1] ]))) train_loader = DataLoader(train_dataset, batch_size=args.effective_batch_size, shuffle=True, **gpuargs) if exists(args.validation_dataset_root): validation_dataset = args.validation_dataset_class( args, True, **tools.kwargs_from_args(args, 'validation_dataset')) block.log('Validation Dataset: {}'.format( args.validation_dataset)) block.log('Validation Input: {}'.format(' '.join([ str([d for d in x.size()]) for x in validation_dataset[0][0] ]))) block.log('Validation Targets: {}'.format(' '.join([ str([d for d in x.size()]) for x in validation_dataset[0][1] ]))) validation_loader = DataLoader( validation_dataset, batch_size=args.effective_batch_size, shuffle=False, **gpuargs) if exists(args.inference_dataset_root): inference_dataset = args.inference_dataset_class( args, False, **tools.kwargs_from_args(args, 'inference_dataset')) block.log('Inference Dataset: {}'.format( args.inference_dataset)) block.log('Inference Input: {}'.format(' '.join([ str([d for d in x.size()]) for x in inference_dataset[0][0] ]))) block.log('Inference Targets: {}'.format(' '.join([ str([d for d in x.size()]) for x in inference_dataset[0][1] ]))) inference_loader = DataLoader( inference_dataset, batch_size=args.effective_inference_batch_size, shuffle=False, **inf_gpuargs) # Dynamically load model and loss class with parameters passed in via "--model_[param]=[value]" or "--loss_[param]=[value]" arguments with tools.TimerBlock("Building {} model".format(args.model)) as block: class ModelAndLoss(nn.Module): def __init__(self, args): super(ModelAndLoss, self).__init__() kwargs = tools.kwargs_from_args(args, 'model') self.model = args.model_class(args, **kwargs) kwargs = tools.kwargs_from_args(args, 'loss') self.loss = args.loss_class(args, **kwargs) def forward(self, data, target, inference=False): output = self.model(data) loss_values = self.loss(output, target) if not inference: return loss_values else: return loss_values, output model_and_loss = ModelAndLoss(args) block.log('Effective Batch Size: {}'.format( args.effective_batch_size)) block.log('Number of parameters: {}'.format( sum([ p.data.nelement() if p.requires_grad else 0 for p in model_and_loss.parameters() ]))) # assing to cuda or wrap with dataparallel, model and loss if args.cuda and (args.number_gpus > 0) and args.fp16: block.log('Parallelizing') model_and_loss = nn.parallel.DataParallel( model_and_loss, device_ids=list(range(args.number_gpus))) block.log('Initializing CUDA') model_and_loss = model_and_loss.cuda().half() torch.cuda.manual_seed(args.seed) param_copy = [ param.clone().type(torch.cuda.FloatTensor).detach() for param in model_and_loss.parameters() ] elif args.cuda and args.number_gpus > 0: block.log('Initializing CUDA') model_and_loss = model_and_loss.cuda() block.log('Parallelizing') model_and_loss = nn.parallel.DataParallel( model_and_loss, device_ids=list(range(args.number_gpus))) torch.cuda.manual_seed(args.seed) else: block.log('CUDA not being used') torch.manual_seed(args.seed) # Load weights if needed, otherwise randomly initialize if args.resume and os.path.isfile(args.resume): block.log("Loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) if not args.inference: args.start_epoch = checkpoint['epoch'] best_err = checkpoint['best_EPE'] model_and_loss.module.model.load_state_dict( checkpoint['state_dict']) block.log("Loaded checkpoint '{}' (at epoch {})".format( args.resume, checkpoint['epoch'])) elif args.resume and args.inference: block.log("No checkpoint found at '{}'".format(args.resume)) quit() else: block.log("Random initialization") block.log("Initializing save directory: {}".format(args.save)) if not os.path.exists(args.save): os.makedirs(args.save) train_logger = SummaryWriter(log_dir=os.path.join( args.save, 'train'), comment='training') validation_logger = SummaryWriter(log_dir=os.path.join( args.save, 'validation'), comment='validation') # Dynamically load the optimizer with parameters passed in via "--optimizer_[param]=[value]" arguments with tools.TimerBlock("Initializing {} Optimizer".format( args.optimizer)) as block: kwargs = tools.kwargs_from_args(args, 'optimizer') if args.fp16: optimizer = args.optimizer_class( [p for p in param_copy if p.requires_grad], **kwargs) else: optimizer = args.optimizer_class([ p for p in model_and_loss.parameters() if p.requires_grad ], **kwargs) for param, default in list(kwargs.items()): block.log("{} = {} ({})".format(param, default, type(default))) # Log all arguments to file for argument, value in sorted(vars(args).items()): block.log2file(args.log_file, '{}: {}'.format(argument, value)) return args
# Change the title for `top` and `pkill` commands setproctitle.setproctitle(args.save) # Dynamically load the dataset class with parameters passed in via "--argument_[param]=[value]" arguments with tools.TimerBlock("Initializing Datasets") as block: args.effective_batch_size = args.batch_size args.batch_size = args.effective_batch_size // args.number_gpus gpuargs = { 'num_workers': args.number_workers, 'pin_memory': True } if args.cuda else {} if exists(args.training_dataset_root): train_dataset = args.training_dataset_class( args, True, **tools.kwargs_from_args(args, 'training_dataset')) block.log('Training Dataset: {}'.format(args.training_dataset)) block.log('Training Input: {}'.format(' '.join( [str([d for d in x.size()]) for x in train_dataset[0][0]]))) block.log('Training Targets: {}'.format(' '.join( [str([d for d in x.size()]) for x in train_dataset[0][1]]))) train_loader = DataLoader(train_dataset, batch_size=args.effective_batch_size, shuffle=True, **gpuargs) if exists(args.validation_dataset_root): validation_dataset = args.validation_dataset_class( args, True, **tools.kwargs_from_args(args, 'validation_dataset')) block.log('Validation Dataset: {}'.format(args.validation_dataset))
'pin_memory': True, 'drop_last' : True} if args.cuda else {} inf_gpuargs = gpuargs.copy() inf_gpuargs['num_workers'] = args.number_workers block.log('Inference Dataset: {}'.format(args.inference_dataset)) dataset_root = args.inference_dataset_root data_name = sorted(os.listdir(dataset_root)) block.log(data_name) inference_loaders = {} for i in range(len(data_name)): dataset_path = os.path.join(dataset_root, data_name[i]) args.inference_dataset_root = dataset_path inference_dataset = args.inference_dataset_class(args, False, **tools.kwargs_from_args(args, 'inference_dataset')) inference_loaders[dataset_path] = DataLoader(inference_dataset, batch_size=args.effective_inference_batch_size, shuffle=False, **inf_gpuargs) block.log('Inference Input: {}'.format(' '.join([str([d for d in x.size()]) for x in inference_dataset[0][0]]))) # Dynamically load model and loss class with parameters passed in via "--model_[param]=[value]" or "--loss_[param]=[value]" arguments with tools.TimerBlock("Building {} model".format(args.model)) as block: class Model(nn.Module): def __init__(self, args): super(Model, self).__init__() kwargs = tools.kwargs_from_args(args, 'model') self.model = args.model_class(args, **kwargs) def forward(self, data): output = self.model(data) return output