def setup_optimizers(self): train_opt = self.opt['train'] optim_params = [] for k, v in self.net_g.named_parameters(): if v.requires_grad: optim_params.append(v) else: logger = get_root_logger() logger.warning(f'Params {k} will not be optimized.') # optimizer g optim_type = train_opt['optim_g'].pop('type') if optim_type == 'Adam': self.optimizer_g = torch.optim.Adam(optim_params, **train_opt['optim_g']) else: raise NotImplementedError( f'optimizer {optim_type} is not supperted yet.') self.optimizers.append(self.optimizer_g) # optimizer d optim_type = train_opt['optim_d'].pop('type') if optim_type == 'Adam': self.optimizer_d = torch.optim.Adam(self.net_d.parameters(), **train_opt['optim_d']) else: raise NotImplementedError( f'optimizer {optim_type} is not supperted yet.') self.optimizers.append(self.optimizer_d)
def create_dataset(dataset_opt): """Create dataset. Args: dataset_opt (dict): Configuration for dataset. It constains: name (str): Dataset name. type (str): Dataset type. """ dataset_type = dataset_opt['type'] # dynamic instantiation for module in _dataset_modules: dataset_cls = getattr(module, dataset_type, None) if dataset_cls is not None: break if dataset_cls is None: raise ValueError(f'Dataset {dataset_type} is not found.') dataset = dataset_cls(dataset_opt) logger = get_root_logger() logger.info( f'Dataset {dataset.__class__.__name__} - {dataset_opt["name"]} ' 'is created.') return dataset
def main(): """main""" args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # torch.backends.cudnn.deterministic = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.resume_from = args.resume_from cfg.gpus = args.gpus if args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = cfg.optimizer['lr'] * cfg.gpus / 8 # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # init logger before other steps logger = get_root_logger(cfg.log_level) logger.info('Distributed training: {}'.format(distributed)) logger.info('Config: {}'.format(cfg.text)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}'.format(args.seed)) set_random_seed(args.seed) model = build_recognizer(cfg.model) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: # [('train', 5), ('val', 1)] datasets.append(build_dataset(cfg.data.val)) # if cfg.checkpoint_config is not None: # # save mmaction version, config file content and class names in # # checkpoints as meta data # cfg.checkpoint_config.meta = dict( # mmaction_version=__version__, # config=cfg.text, # ) # add an attribute for visualization convenience train_network(model, datasets, cfg, distributed=distributed, validate=args.validate, logger=logger)
def _log_validation_metric_values(self, current_iter, dataset_name, tb_logger): log_str = f'Validation {dataset_name}\n' for metric, value in self.metric_results.items(): log_str += f'\t # {metric}: {value:.4f}\n' logger = get_root_logger() logger.info(log_str) if tb_logger: for metric, value in self.metric_results.items(): tb_logger.add_scalar(f'metrics/{metric}', value, current_iter)
def forward(self, x, feat): out = self.conv_offset(feat) o1, o2, mask = torch.chunk(out, 3, dim=1) offset = torch.cat((o1, o2), dim=1) mask = torch.sigmoid(mask) offset_absmean = torch.mean(torch.abs(offset)) if offset_absmean > 50: logger = get_root_logger() logger.warning( f'Offset abs mean is {offset_absmean}, larger than 50.') return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups, self.deformable_groups)
def init_loggers(opt): log_file = osp.join(opt['path']['log'], f"train_{opt['name']}_{get_time_str()}.log") logger = get_root_logger(logger_name='codes', log_level=logging.INFO, log_file=log_file) # logger.info(get_env_info()) logger.info(dict2str(opt)) # initialize wandb logger before tensorboard logger to allow proper sync: if (opt['logger'].get('wandb') is not None) and (opt['logger']['wandb'].get('project') is not None) and ('debug' not in opt['name']): assert opt['logger'].get('use_tb_logger') is True, ( 'should turn on tensorboard when using wandb') init_wandb_logger(opt) tb_logger = None if opt['logger'].get('use_tb_logger') and 'debug' not in opt['name']: tb_logger = init_tb_logger(log_dir=osp.join('tb_logger', opt['name'])) return logger, tb_logger
def main(): # parse options, set distributed setting, set ramdom seed opt = parse_options(is_train=False) torch.backends.cudnn.benchmark = True # torch.backends.cudnn.deterministic = True # mkdir and initialize loggers make_exp_dirs(opt) log_file = osp.join(opt['path']['log'], f"test_{opt['name']}_{get_time_str()}.log") logger = get_root_logger(logger_name='codes', log_level=logging.INFO, log_file=log_file) logger.info(dict2str(opt)) # create test dataset and dataloader test_loaders = [] for phase, dataset_opt in sorted(opt['datasets'].items()): test_set = create_dataset(dataset_opt) test_loader = create_dataloader(test_set, dataset_opt, num_gpu=opt['num_gpu'], dist=opt['dist'], sampler=None, seed=opt['manual_seed']) logger.info( f"Number of test images in {dataset_opt['name']}: {len(test_set)}") test_loaders.append(test_loader) # create model model = create_model(opt) for test_loader in test_loaders: test_set_name = test_loader.dataset.opt['name'] logger.info(f'Testing {test_set_name}...') model.validation(test_loader, current_iter=opt['name'], tb_logger=None, save_img=opt['val']['save_img'])
def create_model(opt): """Create model. Args: opt (dict): Configuration. It constains: model_type (str): Model type. """ model_type = opt['model_type'] # dynamic instantiation for module in _model_modules: model_cls = getattr(module, model_type, None) if model_cls is not None: break if model_cls is None: raise ValueError(f'Model {model_type} is not found.') model = model_cls(opt) logger = get_root_logger() logger.info(f'Model [{model.__class__.__name__}] is created.') return model
def dist_validation(self, dataloader, current_iter, tb_logger, save_img): logger = get_root_logger() logger.info('Only support single GPU validation.') self.nondist_validation(dataloader, current_iter, tb_logger, save_img)
def create_dataloader(dataset, dataset_opt, num_gpu=1, dist=False, sampler=None, seed=None): """Create dataloader. Args: dataset (torch.utils.data.Dataset): Dataset. dataset_opt (dict): Dataset options. It contains the following keys: phase (str): 'train' or 'val'. num_worker_per_gpu (int): Number of workers for each GPU. batch_size_per_gpu (int): Training batch size for each GPU. num_gpu (int): Number of GPUs. Used only in the train phase. Default: 1. dist (bool): Whether in distributed training. Used only in the train phase. Default: False. sampler (torch.utils.data.sampler): Data sampler. Default: None. seed (int | None): Seed. Default: None """ phase = dataset_opt['phase'] rank, _ = get_dist_info() if phase == 'train': if dist: # distributed training batch_size = dataset_opt['batch_size_per_gpu'] num_workers = dataset_opt['num_worker_per_gpu'] else: # non-distributed training multiplier = 1 if num_gpu == 0 else num_gpu batch_size = dataset_opt['batch_size_per_gpu'] * multiplier num_workers = dataset_opt['num_worker_per_gpu'] * multiplier dataloader_args = dict(dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, sampler=sampler, drop_last=True) if sampler is None: dataloader_args['shuffle'] = True dataloader_args['worker_init_fn'] = partial( worker_init_fn, num_workers=num_workers, rank=rank, seed=seed) if seed is not None else None elif phase in ['val', 'test']: # validation dataloader_args = dict(dataset=dataset, batch_size=1, shuffle=False, num_workers=0) else: raise ValueError(f'Wrong dataset phase: {phase}. ' "Supported ones are 'train', 'val' and 'test'.") dataloader_args['pin_memory'] = dataset_opt.get('pin_memory', False) prefetch_mode = dataset_opt.get('prefetch_mode') if prefetch_mode == 'cpu': # CPUPrefetcher num_prefetch_queue = dataset_opt.get('num_prefetch_queue', 1) logger = get_root_logger() logger.info(f'Use {prefetch_mode} prefetch dataloader: ' f'num_prefetch_queue = {num_prefetch_queue}') return PrefetchDataLoader(num_prefetch_queue=num_prefetch_queue, **dataloader_args) else: # prefetch_mode=None: Normal dataloader # prefetch_mode='cuda': dataloader for CUDAPrefetcher return torch.utils.data.DataLoader(**dataloader_args)