def __init__(self, dataset, bn_dataset, interval, optimizer_cfg, lr_cfg, dist_mode=True, initial=True, resume_best_path='', epoch_per_stage=None, **eval_kwargs): if isinstance(dataset, Dataset) and isinstance(bn_dataset, Dataset): self.dataset = dataset self.bn_dataset = bn_dataset elif isinstance(dataset, dict) and isinstance(bn_dataset, dict): self.dataset = datasets.build_dataset(dataset) self.bn_dataset = datasets.build_dataset(bn_dataset) else: raise TypeError( 'dataset must be a Dataset object or a dict, not {}'.format( type(dataset))) self.data_loader = datasets.build_dataloader( self.dataset, eval_kwargs['imgs_per_gpu'], eval_kwargs['workers_per_gpu'], dist=dist_mode, shuffle=False, prefetch=eval_kwargs.get('prefetch', False), img_norm_cfg=eval_kwargs.get('img_norm_cfg', dict())) self.bn_data_loader = datasets.build_dataloader( self.bn_dataset, eval_kwargs['imgs_per_gpu'], eval_kwargs['workers_per_gpu'], dist=dist_mode, shuffle=True, prefetch=eval_kwargs.get('prefetch', False), img_norm_cfg=eval_kwargs.get('img_norm_cfg', dict())) self.bn_data = next(iter(self.bn_data_loader)) self.bn_data = self.bn_data['img'] del self.bn_data_loader self.dist_mode = dist_mode self.initial = initial self.interval = interval self.optimizer_cfg = optimizer_cfg self.lr_cfg = lr_cfg self.eval_kwargs = eval_kwargs self.epoch_per_stage = epoch_per_stage if epoch_per_stage is not None else interval if resume_best_path: with open(resume_best_path, 'r') as f: self.loaded_best_path = yaml.load(f) else: self.loaded_best_path = []
def __init__(self, dataset, dist_mode=True, initial=True, interval=1, **eval_kwargs): from openselfsup import datasets if isinstance(dataset, Dataset): self.dataset = dataset elif isinstance(dataset, dict): self.dataset = datasets.build_dataset(dataset) else: raise TypeError( 'dataset must be a Dataset object or a dict, not {}'.format( type(dataset))) self.data_loader = datasets.build_dataloader( self.dataset, eval_kwargs['imgs_per_gpu'], eval_kwargs['workers_per_gpu'], dist=dist_mode, shuffle=False) self.dist_mode = dist_mode self.initial = initial self.interval = interval self.eval_kwargs = eval_kwargs
def __init__(self, dataset, dist_mode=True, initial=True, interval=1, **eval_kwargs): from openselfsup import datasets if isinstance(dataset, Dataset): self.dataset = dataset elif isinstance(dataset, dict): self.dataset = datasets.build_dataset(dataset) else: raise TypeError( 'dataset must be a Dataset object or a dict, not {}'.format( type(dataset))) self.run_after_epoch = eval_kwargs.get('by_epoch', True) self.val_name = eval_kwargs.get('name', "unnamed-val-hook") self.data_loader = datasets.build_dataloader( self.dataset, eval_kwargs['imgs_per_gpu'], eval_kwargs['workers_per_gpu'], dist=dist_mode, shuffle=False, prefetch=eval_kwargs.get('prefetch', False), img_norm_cfg=eval_kwargs.get('img_norm_cfg', dict()), ) self.dist_mode = dist_mode self.initial = initial self.interval = interval self.eval_kwargs = eval_kwargs
def main(): args = parse_args() cfg = mmcv.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir cfg.model.pretrained = None # ensure to use checkpoint rather than pretraining # check memcached package exists if importlib.util.find_spec('mc') is None: traverse_replace(cfg, 'memcached', False) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True if args.launcher == 'slurm': cfg.dist_params['port'] = args.port init_dist(args.launcher, **cfg.dist_params) # logger timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, 'test_{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # build the dataloader dataset = build_dataset(cfg.data.val) data_loader = build_dataloader( dataset, imgs_per_gpu=cfg.data.imgs_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_model(cfg.model) load_checkpoint(model, args.checkpoint, map_location='cpu') if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader) # dict{key: np.ndarray} rank, _ = get_dist_info() if rank == 0: for name, val in outputs.items(): dataset.evaluate( torch.from_numpy(val), name, logger, topk=(1, 5))
def get_data_loader_from_cfg( cfg_path=LINEAR_CFG_PATH): cfg = Config.fromfile(cfg_path) cfg.data.train.data_source.memcached = False dataset = build_dataset(cfg.data.train) data_loader = DataLoader( dataset, batch_size=50, sampler=RandomSampler(dataset), pin_memory=False) return data_loader
def __init__(self, train_dataset, val_dataset, dist_mode=True, **eval_kwargs): from openselfsup import datasets if isinstance(train_dataset, Dataset): self.train_dataset = train_dataset elif isinstance(train_dataset, dict): self.train_dataset = datasets.build_dataset(train_dataset) else: raise TypeError( 'train_dataset must be a Dataset object or a dict, not {}'. format(type(train_dataset))) self.train_data_loader = datasets.build_dataloader( self.train_dataset, eval_kwargs['imgs_per_gpu'], eval_kwargs['workers_per_gpu'], dist=dist_mode, shuffle=False) if isinstance(val_dataset, Dataset): self.val_dataset = val_dataset elif isinstance(val_dataset, dict): self.val_dataset = datasets.build_dataset(val_dataset) else: raise TypeError( 'val_dataset must be a Dataset object or a dict, not {}'. format(type(val_dataset))) self.val_data_loader = datasets.build_dataloader( self.val_dataset, eval_kwargs['imgs_per_gpu'], eval_kwargs['workers_per_gpu'], dist=dist_mode, shuffle=False) self.dist_mode = dist_mode self.eval_kwargs = eval_kwargs self.lookup = None if 'lookup' in eval_kwargs and eval_kwargs['lookup'] is not None: self.lookup = torch.load(eval_kwargs['lookup'])
def __init__(self, dataset, imgs_per_gpu, workers_per_gpu, dist_mode=False): from openselfsup import datasets if isinstance(dataset, Dataset): self.dataset = dataset elif isinstance(dataset, dict): self.dataset = datasets.build_dataset(dataset) else: raise TypeError( 'dataset must be a Dataset object or a dict, not {}'.format( type(dataset))) self.data_loader = datasets.build_dataloader(self.dataset, imgs_per_gpu, workers_per_gpu, dist=dist_mode, shuffle=False) self.dist_mode = dist_mode self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
def main(): args = parse_args() cfg = mmcv.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir layer_ind = [int(idx) for idx in args.layer_ind.split(',')] cfg.model.backbone.out_indices = layer_ind # checkpoint and pretrained are exclusive assert cfg.model.pretrained == "random" or args.checkpoint is None, \ "Checkpoint and pretrained are exclusive." # check memcached package exists if importlib.util.find_spec('mc') is None: for field in ['train', 'val', 'test']: if hasattr(cfg.data, field): getattr(cfg.data, field).data_source.memcached = False # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # logger timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, 'extract_{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # build the dataloader dataset_cfg = mmcv.Config.fromfile(args.dataset_config) dataset = build_dataset(dataset_cfg.data.extract) data_loader = build_dataloader( dataset, imgs_per_gpu=dataset_cfg.data.imgs_per_gpu, workers_per_gpu=dataset_cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_model(cfg.model) if args.checkpoint is not None: logger.info("Use checkpoint: {} to extract features".format( args.checkpoint)) load_checkpoint(model, args.checkpoint, map_location='cpu') elif args.pretrained != "random": logger.info('Use pretrained model: {} to extract features'.format( args.pretrained)) else: logger.info('No checkpoint or pretrained is give, use random init.') if not distributed: model = MMDataParallel(model, device_ids=[0]) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) # build extraction processor extractor = ExtractProcess(pool_type='specified', backbone='resnet50', layer_indices=layer_ind) # run outputs = extractor.extract(model, data_loader, distributed=distributed) rank, _ = get_dist_info() mmcv.mkdir_or_exist("{}/features/".format(args.work_dir)) if rank == 0: for key, val in outputs.items(): split_num = len(dataset_cfg.split_name) split_at = dataset_cfg.split_at for ss in range(split_num): output_file = "{}/features/{}_{}.npy".format( args.work_dir, dataset_cfg.split_name[ss], key) if ss == 0: np.save(output_file, val[:split_at[0]]) elif ss == split_num - 1: np.save(output_file, val[split_at[-1]:]) else: np.save(output_file, val[split_at[ss - 1]:split_at[ss]])
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.resume_from = args.resume_from cfg.gpus = args.gpus # my if args.imgs_per_gpu is not None: cfg.data.imgs_per_gpu = args.imgs_per_gpu if args.val_imgs_per_gpu is not None: cfg.custom_hooks[0].imgs_per_gpu = args.val_imgs_per_gpu # # check memcached package exists if importlib.util.find_spec('mc') is None: traverse_replace(cfg, 'memcached', False) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False assert cfg.model.type not in \ ['DeepCluster', 'MOCO', 'SimCLR', 'ODC', 'NPID'], \ "{} does not support non-dist training.".format(cfg.model.type) else: distributed = True if args.launcher == 'slurm': cfg.dist_params['port'] = args.port init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, 'train_{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([('{}: {}'.format(k, v)) for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info('Distributed training: {}'.format(distributed)) logger.info('Config:\n{}'.format(cfg.text)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}, deterministic: {}'.format( args.seed, args.deterministic)) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed if args.pretrained is not None: assert isinstance(args.pretrained, str) cfg.model.pretrained = args.pretrained model = build_model(cfg.model) datasets = [build_dataset(cfg.data.train)] assert len(cfg.workflow) == 1, "Validation is called by hook." if cfg.checkpoint_config is not None: # save openselfsup version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(openselfsup_version=__version__, config=cfg.text) # add an attribute for visualization convenience train_model(model, datasets, cfg, distributed=distributed, timestamp=timestamp, meta=meta)
def main(): print(f"Using num gpus: {torch.cuda.device_count()}") args = parse_args() cfg = Config.fromfile(args.config) if args.local_rank == 0: wandb.init(config=cfg.model) wandb.config.update(cfg.data) wandb.config.update( {"pipelines": ','.join([p.type for p in cfg.data.train.pipeline])}) wandb.config.update({"epochs": cfg.total_epochs}) wandb.config.update({"dataset_size": cfg.dataset_size}) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if wandb.run is not None: # save to wandb run dir for tracking and saving the models cfg.work_dir = wandb.run.dir elif args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.resume_from = args.resume_from cfg.gpus = args.gpus # check memcached package exists if importlib.util.find_spec('mc') is None: traverse_replace(cfg, 'memcached', False) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False if not args.debug: assert cfg.model.type not in \ ['DeepCluster', 'MOCO', 'SimCLR', 'ODC', 'NPID'], \ "{} does not support non-dist training unless debugging (use --debug flag).".format( cfg.model.type) else: distributed = True if args.launcher == 'slurm': cfg.dist_params['port'] = args.port init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, 'train_{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([('{}: {}'.format(k, v)) for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info('Distributed training: {}'.format(distributed)) logger.info('Config:\n{}'.format(cfg.text)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}, deterministic: {}'.format( args.seed, args.deterministic)) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed if args.pretrained is not None: assert isinstance(args.pretrained, str) cfg.model.pretrained = args.pretrained model = build_model(cfg.model) if args.local_rank == 0: print(model) if args.debug: logger.info( "DEBUGGING enabled, setting batch size to 64 to allow 1 gpu debugging" ) cfg.data.batch_size = 64 model.set_debug() datasets = [build_dataset(cfg.data.train)] assert len(cfg.workflow) == 1, "Validation is called by hook." if cfg.checkpoint_config is not None: # save openselfsup version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(openselfsup_version=__version__, config=cfg.text) if args.local_rank == 0: wandb.watch(model) # add an attribute for visualization convenience train_model(model, datasets, cfg, distributed=distributed, timestamp=timestamp, meta=meta, debug=args.debug)
def main(): args = parse_args() cfg = mmcv.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir cfg.model.pretrained = None # ensure to use checkpoint rather than pretraining # check memcached package exists if importlib.util.find_spec('mc') is None: traverse_replace(cfg, 'memcached', False) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True if args.launcher == 'slurm': cfg.dist_params['port'] = args.port init_dist(args.launcher, **cfg.dist_params) # logger timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, 'test_{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # build the dataloader dataset = build_dataset(cfg.data.val) data_loader = build_dataloader(dataset, imgs_per_gpu=cfg.data.imgs_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_model(cfg.model) activations = defaultdict(list) #idea from gist.github.com/Tushar-N/680633ec18f5cb4b47933da7d10902af if args.layer_type == nn.Linear: #can save all activations def save_activation(name, mod, inp, out): activations[name].append(out.cpu()) else: def save_activation(name, mod, inp, out): activations[name] = [out.cpu()] load_checkpoint(model, args.checkpoint, map_location='cpu') for name, m in model.named_modules(): if type(m) == args.layer_type: m.register_forward_hook(partial(save_activation, name)) if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader) else: raise NotImplementedError( "Distributed Data Parallel does not register hooks.") model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader) # dict{key: np.ndarray} activations = { name: torch.cat(outputs, 0) for name, outputs in activations.items() } act_file = osp.join(cfg.work_dir, "model_acts") np.savez(act_file, **activations)