def test_train_model(): model = ExampleModel() dataset = ExampleDataset() datasets = [ExampleDataset(), ExampleDataset()] cfg = dict(seed=0, gpus=1, gpu_ids=[0], resume_from=None, load_from=None, workflow=[('train', 1)], total_epochs=5, evaluation=dict(interval=1, key_indicator='acc'), data=dict(videos_per_gpu=1, workers_per_gpu=0, val=dict(type='ExampleDataset')), optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001), optimizer_config=dict(grad_clip=dict(max_norm=40, norm_type=2)), lr_config=dict(policy='step', step=[40, 80]), omnisource=False, precise_bn=False, checkpoint_config=dict(interval=1), log_level='INFO', log_config=dict(interval=20, hooks=[dict(type='TextLoggerHook')])) with tempfile.TemporaryDirectory() as tmpdir: # normal train cfg['work_dir'] = tmpdir config = Config(cfg) train_model(model, dataset, config) with tempfile.TemporaryDirectory() as tmpdir: # train with validation cfg['work_dir'] = tmpdir config = Config(cfg) train_model(model, dataset, config, validate=True) with tempfile.TemporaryDirectory() as tmpdir: # train with Fp16OptimizerHook cfg['work_dir'] = tmpdir cfg['fp16'] = dict(loss_scale=512.) config = Config(cfg) model.fp16_enabled = None train_model(model, dataset, config) with tempfile.TemporaryDirectory() as tmpdir: cfg['work_dir'] = tmpdir cfg['omnisource'] = True config = Config(cfg) train_model(model, datasets, config) with tempfile.TemporaryDirectory() as tmpdir: # train with precise_bn on cfg['work_dir'] = tmpdir cfg['precise_bn'] = dict(num_iters=1, interval=1) config = Config(cfg) train_model(model, datasets, config)
def Train(self): self.setup(); print("Starting to train ..."); train_model(self.system_dict["local"]["model"], self.system_dict["local"]["datasets"], self.system_dict["local"]["cfg"], distributed=self.system_dict["params"]["distributed"], validate=self.system_dict["params"]["val_dataset"]) print("Done"); print("Creating inference config file"); cfg_infer = Config.fromfile(self.system_dict["params"]["inference_config_file"]); cfg_infer.model.cls_head.num_classes = self.system_dict["params"]["num_classes"] cfg_infer.dump(self.system_dict["local"]["cfg"].work_dir + "/config.py"); print("Done");
def main(): args = parse_args() cfg = Config.fromfile(args.config) cfg.merge_from_dict(args.cfg_options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: # CLI > config file > default (base filename) if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # The flag is used to determine whether it is omnisource training cfg.setdefault('omnisource', False) # The flag is used to register module's hooks cfg.setdefault('module_hooks', []) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config: {cfg.text}') # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed meta['config_name'] = osp.basename(args.config) meta['work_dir'] = osp.basename(cfg.work_dir.rstrip('/\\')) model = build_model(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg')) register_module_hooks(model.backbone, cfg.module_hooks) if cfg.omnisource: # If omnisource flag is set, cfg.data.train should be a list assert type(cfg.data.train) is list datasets = [build_dataset(dataset) for dataset in cfg.data.train] else: datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: # For simplicity, omnisource is not compatiable with val workflow, # we recommend you to use `--validate` assert not cfg.omnisource if args.validate: warnings.warn('val workflow is duplicated with `--validate`, ' 'it is recommended to use `--validate`. see ' 'https://github.com/open-mmlab/mmaction2/pull/123') val_dataset = copy.deepcopy(cfg.data.val) datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmaction version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmaction_version=__version__ + get_git_hash(digits=7), config=cfg.text) train_model(model, datasets, cfg, distributed=distributed, validate=args.validate, timestamp=timestamp, meta=meta)
def main(): # parse arguments args = parse_args() # load config cfg = Config.fromfile(args.config) if args.update_config is not None: cfg.merge_from_dict(args.update_config) cfg = update_config(cfg, args) cfg = propagate_root_dir(cfg, args.data_dir) # init distributed env first, since logger depends on the dist info. distributed = args.launcher != 'none' if distributed: init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config: {cfg.text}') if cfg.get('nncf_config'): check_nncf_is_enabled() logger.info('NNCF config: {}'.format(cfg.nncf_config)) meta.update(get_nncf_metadata()) # set random seeds cfg.seed = args.seed meta['seed'] = args.seed if cfg.get('seed'): logger.info(f'Set random seed to {cfg.seed}, deterministic: {args.deterministic}') set_random_seed(cfg.seed, deterministic=args.deterministic) # build datasets datasets = [build_dataset(cfg.data, 'train', dict(logger=logger))] logger.info(f'Train datasets:\n{str(datasets[0])}') if len(cfg.workflow) == 2: if not args.no_validate: warnings.warn('val workflow is duplicated with `--validate`, ' 'it is recommended to use `--validate`. see ' 'https://github.com/open-mmlab/mmaction2/pull/123') datasets.append(build_dataset(copy.deepcopy(cfg.data), 'val', dict(logger=logger))) logger.info(f'Val datasets:\n{str(datasets[-1])}') # filter dataset labels if cfg.get('classes'): datasets = [dataset.filter(cfg.classes) for dataset in datasets] # build model model = build_model( cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg, class_sizes=datasets[0].class_sizes, class_maps=datasets[0].class_maps ) # define ignore layers ignore_prefixes = [] if hasattr(cfg, 'reset_layer_prefixes') and isinstance(cfg.reset_layer_prefixes, (list, tuple)): ignore_prefixes += cfg.reset_layer_prefixes ignore_suffixes = ['num_batches_tracked'] if hasattr(cfg, 'reset_layer_suffixes') and isinstance(cfg.reset_layer_suffixes, (list, tuple)): ignore_suffixes += cfg.reset_layer_suffixes # train model train_model( model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta, ignore_prefixes=tuple(ignore_prefixes), ignore_suffixes=tuple(ignore_suffixes) )
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: # CLI > config file > default (base filename) if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config: {cfg.text}') # set random seeds if args.seed is not None: logger.info('Set random seed to {}, deterministic: {}'.format( args.seed, args.deterministic)) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_model(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmaction version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmaction_version=__version__, config=cfg.text) train_model(model, datasets, cfg, distributed=distributed, validate=args.validate, timestamp=timestamp, meta=meta)
# The original learning rate (LR) is set for 8-GPU training. # We divide it by 8 since we only use one GPU. cfg.optimizer.lr = cfg.optimizer.lr / 8 / 16 cfg.total_epochs = 15 # We can set the checkpoint saving interval to reduce the storage cost cfg.checkpoint_config.interval = 10 # We can set the log print interval to reduce the the times of printing log cfg.log_config.interval = 5 # Set seed thus the results are more reproducible cfg.seed = 0 set_random_seed(0, deterministic=False) cfg.gpu_ids = range(1) # We can initialize the logger for training and have a look # at the final config used for training print(f'Config:\n{cfg.pretty_text}') # Build the dataset datasets = [build_dataset(cfg.data.train)] # Build the recognizer model = build_model(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) # Create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) train_model(model, datasets, cfg, distributed=False, validate=True)