def main(): args = parse_args() cfg = mmcv.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir cfg.model.pretrained = None # ensure to use checkpoint rather than pretraining # check memcached package exists if importlib.util.find_spec('mc') is None: traverse_replace(cfg, 'memcached', False) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True if args.launcher == 'slurm': cfg.dist_params['port'] = args.port init_dist(args.launcher, **cfg.dist_params) # logger timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, 'test_{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # build the dataloader dataset = build_dataset(cfg.data.val) data_loader = build_dataloader( dataset, imgs_per_gpu=cfg.data.imgs_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_model(cfg.model) load_checkpoint(model, args.checkpoint, map_location='cpu') if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader) # dict{key: np.ndarray} rank, _ = get_dist_info() if rank == 0: for name, val in outputs.items(): dataset.evaluate( torch.from_numpy(val), name, logger, topk=(1, 5))
def get_model( model_cfg_path=MOCO_CFG_PATH, ckpt_path=MOCO_MODEL_PATH): model = build_model( Config.fromfile(model_cfg_path).model) model_dict = torch.load(ckpt_path) model.load_state_dict(model_dict['state_dict']) model = model.encoder_q.cuda() for param in model.parameters(): param.requires_grad = False model.eval() return model
def main(): args = parse_args() cfg = Config.fromfile(args.config) model = build_model(cfg.model) num_params = sum(p.numel() for p in model.parameters()) / 1000000. num_grad_params = sum(p.numel() for p in model.parameters() \ if p.requires_grad) / 1000000. num_backbone_params = sum(p.numel() for p in model.backbone.parameters()) / 1000000. num_backbone_grad_params = sum(p.numel() for p in model.backbone.parameters() \ if p.requires_grad) / 1000000. print( "Number of backbone parameters: {:.5g} M".format(num_backbone_params)) print("Number of backbone parameters requiring grad: {:.5g} M".format( num_backbone_grad_params)) print("Number of total parameters: {:.5g} M".format(num_params)) print("Number of total parameters requiring grad: {:.5g} M".format( num_grad_params))
def init_model(config, checkpoint=None, device='cuda:0'): """Initialize a model from config file. Args: config (str or :obj:`mmcv.Config`): Config file path or the config object. checkpoint (str, optional): Checkpoint path. If left as None, the model will not load any weights. Returns: nn.Module: The constructed detector. """ if isinstance(config, str): config = mmcv.Config.fromfile(config) elif not isinstance(config, mmcv.Config): raise TypeError('config must be a filename or Config object, ' 'but got {}'.format(type(config))) config.model.pretrained = None model = build_model(config.model) if checkpoint is not None: checkpoint = load_checkpoint(model, checkpoint) model.cfg = config # save the config in the model for convenience model.to(device) model.eval() return model
def main(): args = parse_args() cfg = mmcv.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir layer_ind = [int(idx) for idx in args.layer_ind.split(',')] cfg.model.backbone.out_indices = layer_ind # checkpoint and pretrained are exclusive assert cfg.model.pretrained == "random" or args.checkpoint is None, \ "Checkpoint and pretrained are exclusive." # check memcached package exists if importlib.util.find_spec('mc') is None: for field in ['train', 'val', 'test']: if hasattr(cfg.data, field): getattr(cfg.data, field).data_source.memcached = False # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # logger timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, 'extract_{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # build the dataloader dataset_cfg = mmcv.Config.fromfile(args.dataset_config) dataset = build_dataset(dataset_cfg.data.extract) data_loader = build_dataloader( dataset, imgs_per_gpu=dataset_cfg.data.imgs_per_gpu, workers_per_gpu=dataset_cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_model(cfg.model) if args.checkpoint is not None: logger.info("Use checkpoint: {} to extract features".format( args.checkpoint)) load_checkpoint(model, args.checkpoint, map_location='cpu') elif args.pretrained != "random": logger.info('Use pretrained model: {} to extract features'.format( args.pretrained)) else: logger.info('No checkpoint or pretrained is give, use random init.') if not distributed: model = MMDataParallel(model, device_ids=[0]) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) # build extraction processor extractor = ExtractProcess(pool_type='specified', backbone='resnet50', layer_indices=layer_ind) # run outputs = extractor.extract(model, data_loader, distributed=distributed) rank, _ = get_dist_info() mmcv.mkdir_or_exist("{}/features/".format(args.work_dir)) if rank == 0: for key, val in outputs.items(): split_num = len(dataset_cfg.split_name) split_at = dataset_cfg.split_at for ss in range(split_num): output_file = "{}/features/{}_{}.npy".format( args.work_dir, dataset_cfg.split_name[ss], key) if ss == 0: np.save(output_file, val[:split_at[0]]) elif ss == split_num - 1: np.save(output_file, val[split_at[-1]:]) else: np.save(output_file, val[split_at[ss - 1]:split_at[ss]])
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.resume_from = args.resume_from cfg.gpus = args.gpus # my if args.imgs_per_gpu is not None: cfg.data.imgs_per_gpu = args.imgs_per_gpu if args.val_imgs_per_gpu is not None: cfg.custom_hooks[0].imgs_per_gpu = args.val_imgs_per_gpu # # check memcached package exists if importlib.util.find_spec('mc') is None: traverse_replace(cfg, 'memcached', False) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False assert cfg.model.type not in \ ['DeepCluster', 'MOCO', 'SimCLR', 'ODC', 'NPID'], \ "{} does not support non-dist training.".format(cfg.model.type) else: distributed = True if args.launcher == 'slurm': cfg.dist_params['port'] = args.port init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, 'train_{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([('{}: {}'.format(k, v)) for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info('Distributed training: {}'.format(distributed)) logger.info('Config:\n{}'.format(cfg.text)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}, deterministic: {}'.format( args.seed, args.deterministic)) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed if args.pretrained is not None: assert isinstance(args.pretrained, str) cfg.model.pretrained = args.pretrained model = build_model(cfg.model) datasets = [build_dataset(cfg.data.train)] assert len(cfg.workflow) == 1, "Validation is called by hook." if cfg.checkpoint_config is not None: # save openselfsup version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(openselfsup_version=__version__, config=cfg.text) # add an attribute for visualization convenience train_model(model, datasets, cfg, distributed=distributed, timestamp=timestamp, meta=meta)
def main(): print(f"Using num gpus: {torch.cuda.device_count()}") args = parse_args() cfg = Config.fromfile(args.config) if args.local_rank == 0: wandb.init(config=cfg.model) wandb.config.update(cfg.data) wandb.config.update( {"pipelines": ','.join([p.type for p in cfg.data.train.pipeline])}) wandb.config.update({"epochs": cfg.total_epochs}) wandb.config.update({"dataset_size": cfg.dataset_size}) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if wandb.run is not None: # save to wandb run dir for tracking and saving the models cfg.work_dir = wandb.run.dir elif args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.resume_from = args.resume_from cfg.gpus = args.gpus # check memcached package exists if importlib.util.find_spec('mc') is None: traverse_replace(cfg, 'memcached', False) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False if not args.debug: assert cfg.model.type not in \ ['DeepCluster', 'MOCO', 'SimCLR', 'ODC', 'NPID'], \ "{} does not support non-dist training unless debugging (use --debug flag).".format( cfg.model.type) else: distributed = True if args.launcher == 'slurm': cfg.dist_params['port'] = args.port init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, 'train_{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([('{}: {}'.format(k, v)) for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info('Distributed training: {}'.format(distributed)) logger.info('Config:\n{}'.format(cfg.text)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}, deterministic: {}'.format( args.seed, args.deterministic)) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed if args.pretrained is not None: assert isinstance(args.pretrained, str) cfg.model.pretrained = args.pretrained model = build_model(cfg.model) if args.local_rank == 0: print(model) if args.debug: logger.info( "DEBUGGING enabled, setting batch size to 64 to allow 1 gpu debugging" ) cfg.data.batch_size = 64 model.set_debug() datasets = [build_dataset(cfg.data.train)] assert len(cfg.workflow) == 1, "Validation is called by hook." if cfg.checkpoint_config is not None: # save openselfsup version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(openselfsup_version=__version__, config=cfg.text) if args.local_rank == 0: wandb.watch(model) # add an attribute for visualization convenience train_model(model, datasets, cfg, distributed=distributed, timestamp=timestamp, meta=meta, debug=args.debug)
def main(): args = parse_args() cfg = mmcv.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir cfg.model.pretrained = None # ensure to use checkpoint rather than pretraining # check memcached package exists if importlib.util.find_spec('mc') is None: traverse_replace(cfg, 'memcached', False) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True if args.launcher == 'slurm': cfg.dist_params['port'] = args.port init_dist(args.launcher, **cfg.dist_params) # logger timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, 'test_{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # build the dataloader dataset = build_dataset(cfg.data.val) data_loader = build_dataloader(dataset, imgs_per_gpu=cfg.data.imgs_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_model(cfg.model) activations = defaultdict(list) #idea from gist.github.com/Tushar-N/680633ec18f5cb4b47933da7d10902af if args.layer_type == nn.Linear: #can save all activations def save_activation(name, mod, inp, out): activations[name].append(out.cpu()) else: def save_activation(name, mod, inp, out): activations[name] = [out.cpu()] load_checkpoint(model, args.checkpoint, map_location='cpu') for name, m in model.named_modules(): if type(m) == args.layer_type: m.register_forward_hook(partial(save_activation, name)) if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader) else: raise NotImplementedError( "Distributed Data Parallel does not register hooks.") model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader) # dict{key: np.ndarray} activations = { name: torch.cat(outputs, 0) for name, outputs in activations.items() } act_file = osp.join(cfg.work_dir, "model_acts") np.savez(act_file, **activations)