def main(): args = parse_args() cfg = Config.fromfile(args.config) if args.options is not None: cfg.merge_from_dict(args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.aml: data_store = os.environ['AZUREML_DATAREFERENCE_{}'.format( args.aml_data_store)] parse(cfg, data_store) if cfg.resume_from is not None: cfg.resume_from = os.path.join(data_store, args.aml_work_dir_prefix, cfg.resume_from) cfg.work_dir = os.path.join(data_store, args.aml_work_dir_prefix, cfg.work_dir) print('work_dir: ', cfg.work_dir) if 'data' in cfg.model.pretrained: cfg.model.pretrained = os.path.join(data_store, cfg.model.pretrained) # if not args.aml: # # work_dir is determined in this priority: CLI > segment in file > filename # if args.work_dir is not None: # # update configs according to CLI args if args.work_dir is not None # cfg.work_dir = args.work_dir # elif cfg.get('work_dir', None) is None: # # use config filename as default work_dir if cfg.work_dir is None # cfg.work_dir = osp.join('./work_dirs', # osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # update gpu num if dist.is_initialized(): cfg.gpus = dist.get_world_size() else: cfg.gpus = args.gpus if args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = cfg.optimizer[ 'lr'] * cfg.gpus / 8 * cfg.data.samples_per_gpu / 32 # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_classifier(cfg.model) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmcls version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmcls_version=__version__, config=cfg.pretty_text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience train_model(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta)
def main(): print("--------> 1") args = parse_args() print("--------> 2") cfg = Config.fromfile(args.config) print("--------> 3") if args.options is not None: cfg.merge_from_dict(args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True print("--------> 4") # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) print("--------> 5") # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) _, world_size = get_dist_info() cfg.gpu_ids = range(world_size) print("--------> 6") # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) print("--------> 7") # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info cfg_pretty_text = cfg.pretty_text logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg_pretty_text}') # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_classifier(cfg.model) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmcls version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmcls_version=__version__, config=cfg_pretty_text, CLASSES=datasets[0].CLASSES) model.CLASSES = datasets[0].CLASSES # add an attribute for visualization convenience train_model(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, device='cpu' if args.device == 'cpu' else 'cuda', meta=meta)
def main(): args = parse_args() cfg = Config.fromfile(args.config) if args.options is not None: cfg.merge_from_dict(args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if args.net_params: tag, input_channels, block1, block2, block3, block4, last_channel = args.net_params.split('-') input_channels = [int(item) for item in input_channels.split('_')] block1 = [int(item) for item in block1.split('_')] block2 = [int(item) for item in block2.split('_')] block3 = [int(item) for item in block3.split('_')] block4 = [int(item) for item in block4.split('_')] last_channel = int(last_channel) inverted_residual_setting = [] for item in [block1, block2, block3, block4]: for _ in range(item[0]): inverted_residual_setting.append([item[1], item[2:-int(len(item)/2-1)], item[-int(len(item)/2-1):]]) cfg.model.backbone.input_channel = input_channels cfg.model.backbone.inverted_residual_setting = inverted_residual_setting cfg.model.backbone.last_channel = last_channel cfg.model.head.in_channels = last_channel # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) if args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8 # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) if args.net_params: log_file = osp.join(cfg.work_dir, f'{args.net_params}.log') else: log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_classifier(cfg.model) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmcls version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict( mmcls_version=__version__, config=cfg.pretty_text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience train_model( model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta)
def main(): args = parse_args() cfg = Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # set multi-process settings setup_multi_processes(cfg) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpus is not None: cfg.gpu_ids = range(1) warnings.warn('`--gpus` is deprecated because we only support ' 'single GPU mode in non-distributed training. ' 'Use `gpus=1` now.') if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids[0:1] warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. ' 'Because we only support single GPU mode in ' 'non-distributed training. Use the first GPU ' 'in `gpu_ids` now.') if args.gpus is None and args.gpu_ids is None: cfg.gpu_ids = [args.gpu_id] # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) _, world_size = get_dist_info() cfg.gpu_ids = range(world_size) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds seed = init_random_seed(args.seed) logger.info(f'Set random seed to {seed}, ' f'deterministic: {args.deterministic}') set_random_seed(seed, deterministic=args.deterministic) cfg.seed = seed meta['seed'] = seed model = build_classifier(cfg.model) model.init_weights() datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) # save mmcls version, config file content and class names in # runner as meta data meta.update( dict(mmcls_version=__version__, config=cfg.pretty_text, CLASSES=datasets[0].CLASSES)) # add an attribute for visualization convenience train_model(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, device='cpu' if args.device == 'cpu' else 'cuda', meta=meta)
def train_single_fold(args, cfg, fold, distributed, seed): # create the work_dir for the fold work_dir = osp.join(cfg.work_dir, f'fold{fold}') cfg.work_dir = work_dir # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # wrap the dataset cfg train_dataset = dict( type='KFoldDataset', fold=fold, dataset=cfg.data.train, num_splits=args.num_splits, seed=seed, ) val_dataset = dict( type='KFoldDataset', fold=fold, # Use the same dataset with training. dataset=copy.deepcopy(cfg.data.train), num_splits=args.num_splits, seed=seed, test_mode=True, ) val_dataset['dataset']['pipeline'] = cfg.data.val.pipeline cfg.data.train = train_dataset cfg.data.val = val_dataset cfg.data.test = val_dataset # dump config stem, suffix = osp.basename(args.config).rsplit('.', 1) cfg.dump(osp.join(cfg.work_dir, f'{stem}_fold{fold}.{suffix}')) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') logger.info( f'-------- Cross-validation: [{fold+1}/{args.num_splits}] -------- ') # set random seeds # Use different seed in different folds logger.info(f'Set random seed to {seed + fold}, ' f'deterministic: {args.deterministic}') set_random_seed(seed + fold, deterministic=args.deterministic) cfg.seed = seed + fold meta['seed'] = seed + fold model = build_classifier(cfg.model) model.init_weights() datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) meta.update( dict(mmcls_version=__version__, config=cfg.pretty_text, CLASSES=datasets[0].CLASSES, kfold=dict(fold=fold, num_splits=args.num_splits))) # add an attribute for visualization convenience train_model(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, device='cpu' if args.device == 'cpu' else 'cuda', meta=meta)