def MMdet_train(self, config): ### only train and cancel validate # create work_dir # dump config config.dump(osp.join(config.work_dir, osp.basename(self.cfg_path))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(config.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=config.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info meta['config'] = config.pretty_text # log some basic info logger.info(f'Distributed training: False') logger.info(f'Config:\n{config.pretty_text}') # set random seeds seed = None config.seed = seed meta['seed'] = seed meta['exp_name'] = osp.basename(self.cfg_path) model = build_detector(config.model, train_cfg=config.get('train_cfg'), test_cfg=config.get('test_cfg')) datasets = [build_dataset(config.data.train)] if config.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data config.checkpoint_config.meta = dict(mmdet_version=__version__ + get_git_hash()[:7], CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, config, distributed=None, validate=self.valid, timestamp=timestamp, meta=meta)
def __init__(self, mode): self.config = patch_config.patch_configs[mode]() self.args = parse_args() cfg = Config.fromfile(self.args.config) cfg.data.samples_per_gpu = 1 if self.args.options is not None: cfg.merge_from_dict(self.args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if self.args.work_dir is not None: if os.path.exists(self.args.work_dir) is False: os.makedirs(self.args.work_dir) if self.args.clear_work_dir: file_list = os.listdir(self.args.work_dir) for f in file_list: if os.path.isdir(os.path.join(self.args.work_dir, f)): shutil.rmtree(os.path.join(self.args.work_dir, f)) else: os.remove(os.path.join(self.args.work_dir, f)) # update configs according to CLI args if args.work_dir is not None cfg.work_dir = self.args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if self.args.resume_from is not None: cfg.resume_from = self.args.resume_from if self.args.gpu_ids is not None: cfg.gpu_ids = self.args.gpu_ids else: cfg.gpu_ids = range(1) if self.args.gpus is None else range( args.gpus) if self.args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8 # init distributed env first, since logger depends on the dist info. if self.args.launcher == 'none': distributed = False else: distributed = True init_dist(self.args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(self.args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds if self.args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(self.args.seed, deterministic=args.deterministic) cfg.seed = self.args.seed meta['seed'] = self.args.seed self.model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) self.model = MMDataParallel(self.model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) # YOLOv4 # zzj self.darknet_model = Darknet(self.config.cfgfile) self.darknet_model.load_weights(self.config.weightfile) self.darknet_model = self.darknet_model.eval().cuda( ) # TODO: Why eval? self.datasets = [build_dataset(cfg.data.train)] self.data_loaders = [ build_dataloader( ds, cfg.data.samples_per_gpu, cfg.data.workers_per_gpu, # cfg.gpus will be ignored if distributed len(cfg.gpu_ids), dist=distributed, seed=cfg.seed) for ds in self.datasets ] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline self.datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.pretty_text, CLASSES=self.datasets[0].CLASSES) # add an attribute for visualization convenience self.model.CLASSES = self.datasets[0].CLASSES self.patch_applier = PatchApplier().cuda() self.patch_transformer = PatchTransformer().cuda() self.prob_extractor = MaxProbExtractor(0, 80, self.config).cuda() self.nps_calculator = NPSCalculator(self.config.printfile, self.config.patch_size).cuda() self.total_variation = TotalVariation().cuda() self.writer = self.init_tensorboard(mode)
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.resume_from = args.resume_from def replace_dataroot(obj, name_list=[ 'ann_file', 'img_prefix', 'class_imgID_file' ]): for name in name_list: path = getattr(obj, name, None) if path is not None: if isinstance(path, list): path = [ p.replace(cfg.data_root, args.data_root) for p in path ] else: path = path.replace(cfg.data_root, args.data_root) setattr(obj, name, path) return obj # update workdir and data_root: if args.work_dir: cfg.work_dir = args.work_dir if args.data_root: if cfg.data.train.type == 'RepeatDataset': cfg.data.train.dataset = replace_dataroot(cfg.data.train.dataset) else: cfg.data.train = replace_dataroot(cfg.data.train) cfg.data.val = replace_dataroot(cfg.data.val) cfg.data.test = replace_dataroot(cfg.data.test) cfg.data_root = args.data_root if args.load_from: cfg.load_from = args.load_from cfg.gpus = args.gpus # Copy config file to work_dir os.makedirs(cfg.work_dir, exist_ok=True) os.system("cp %s %s" % (args.config, cfg.work_dir)) # if args.autoscale_lr: # import pdb; pdb.set_trace() # # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) # cfg.optimizer['lr'] = cfg.optimizer['lr'] * cfg.gpus / 8 # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, '{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([('{}: {}'.format(k, v)) for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info('Distributed training: {}'.format(distributed)) logger.info('Config:\n{}'.format(cfg.text)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}, deterministic: {}'.format( args.seed, args.deterministic)) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=args.validate, timestamp=timestamp, meta=meta)
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.resume_from = args.resume_from cfg.gpus = args.gpus if args.cross_validate: for i in range(4): if os.path.exists( cfg.data.train.ann_file.replace( 're_', '{}_split_'.format(str(i + 1)))): continue else: raise FileNotFoundError( cfg.data.train.ann_file.replace( 're_', '{}_split_'.format(str(i + 1)))) if args.left_parameters is not None: cfg = merge_from_list(cfg, args.left_parameters) if args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = (cfg.optimizer['lr'] * cfg.gpus / 8) * cfg.data.imgs_per_gpu / 2 # init distributed env first, since logger depends on the dist info. if args.cross_validate: if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) original_ann = cfg.data.train.ann_file original_output = cfg.work_dir for i in range(0, 4): now_ann = original_ann.replace('re_', '{}_split_'.format(str(i + 1))) now_output = os.path.join(original_output, '{}_split'.format(str(i + 1))) cfg.data.train.ann_file = now_ann cfg.work_dir = now_output # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, '{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([('{}: {}'.format(k, v)) for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info('Distributed training: {}'.format(distributed)) logger.info('Config:\n{}'.format(cfg.text)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}, deterministic: {}'.format( args.seed, args.deterministic)) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=args.validate, timestamp=timestamp, meta=meta) if args.clear_output: max_epochs = cfg.total_epochs files = glob.glob(now_output + '/epoch_*.pth') for file in files: e = file.split('_')[-1] e = int(e.split('.')[0]) if e < max_epochs: os.remove(file) else: if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, '{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([('{}: {}'.format(k, v)) for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info('Distributed training: {}'.format(distributed)) logger.info('Config:\n{}'.format(cfg.text)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}, deterministic: {}'.format( args.seed, args.deterministic)) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=args.validate, timestamp=timestamp, meta=meta)
def main(): args = parse_args() cfg = Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # import modules from string list. if cfg.get('custom_imports', None): from mmcv.utils import import_modules_from_strings import_modules_from_strings(**cfg['custom_imports']) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # re-set gpu_ids with distributed training mode _, world_size = get_dist_info() cfg.gpu_ids = range(world_size) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info meta['config'] = cfg.pretty_text # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed meta['exp_name'] = osp.basename(args.config) # 建立模型 model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] # 验证集 if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__ + get_git_hash()[:7], CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta)
def main(): args=parse_args() # print(args) cfg=Config.fromfile(args.config) # print(cfg) # set cudnn_benchmark if cfg.get('cudnn_benchmark',False): # print("set cudnn_benchmark") torch.backends.cudnn.benchmark=True # set work_dir if args.work_dir is not None: # print("set work_dir") cfg.work_dir=args.work_dir elif cfg.get('work_dir',None) is not None: cfg.work_dir=os.path.join('./work_dirs',os.path.splitext(os.path.basename(args.config))[0]) # set resume_from if args.resume_from is not None: cfg.resume_from=args.resume_from # set gpu if args.gpu_ids is not None: cfg.gpu_ids=args.gpu_ids else: cfg.gpu_ids=range(1) if args.gpus is None else range(args.gpus) # set distributed env if args.launcher=='none': distributed=False else: # 其实这里默认single gpu,我感觉这里直接设置ids=ids就可以,word_size可以省略 distributed=True # init_dist(args.launcher,**cfg.dist_params) _,word_size=get_dist_info() #print(word_size) cfg.gpu_ids=range(word_size) # create work_dir mmcv.mkdir_or_exist(os.path.abspath(cfg.work_dir)) # dump config cfg.dump(os.path.join(cfg.work_dir,os.path.basename(args.config))) # init logger timestamp=time.strftime("%Y%m%d_%H%M%S",time.localtime()) log_file=os.path.join(cfg.work_dir,f'{timestamp}.log') logger=get_root_logger(log_file=log_file,log_level=cfg.log_level) # init something tobe logged meta=dict() env_info_dict=collect_env() env_info='\n'.join([(f'{k}:{v}') for k,v in env_info_dict.items()]) dash_line='-'*60+'\n' logger.info('Environment info:\n'+dash_line+env_info+'\n'+dash_line) meta['env_info']=env_info meta['config']=cfg.pretty_text logger.info(f'Distributed training:{distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random sees if args.seed is not None: logger.info(f'Set random seed to {args.seed}, deterministic:{args.deterministic}') set_random_seed(args.seed,deterministic=args.deterministic) cfg.seed=args.seed meta['seed']=args.seed meta['exp_name']=os.path.basename(args.config) model=build_model(cfg.model,train_cfg=cfg.get('train_cfg'),test_cfg=cfg.get('test_cfg')) datasets=[build_dataset(cfg.data.train)] if cfg.checkpoint_config is not None: cfg.checkpoint_config.meta=dict(mmdet_version=__version__+get_git_hash()[:7],CLASSES=datasets[0].CLASSES) # model.CLASSES=datasets[0].CLASSES print(model) train_detector(model,datasets,cfg,distributed=distributed,validate=(not args.no_validate),meta=meta,timestamp=timestamp)
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir if not os.path.exists(cfg.work_dir): os.makedirs(cfg.work_dir) if args.resume_from is not None: cfg.resume_from = args.resume_from cfg.gpus = args.gpus # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # init logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info meta['config'] = cfg.pretty_text # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds if args.seed is not None: logger.info('Set random seed to {}'.format(args.seed)) set_random_seed(args.seed) model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) train_dataset = get_dataset(cfg.data.train) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.text, CLASSES=train_dataset.CLASSES) # add an attribute for visualization convenience model.CLASSES = train_dataset.CLASSES train_detector(model, train_dataset, cfg, distributed=distributed, validate=args.validate)
def main(): args = parse_args() cfg = Config.fromfile(args.config) if args.options is not None: cfg.merge_from_dict(args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_directory is determined in this priority: CLI > segment in file > filename if args.work_directory is not None: # update configs according to CLI args if args.work_directory is not None cfg.work_directory = args.work_directory elif cfg.get('work_directory', None) is None: # use config filename as default work_directory if cfg.work_directory is None cfg.work_directory = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_directory mmcv.mkdir_or_exist(osp.abspath(cfg.work_directory)) # dump config cfg.dump(osp.join(cfg.work_directory, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_directory, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # ---------- MI-AOD Training and Test Start Here ---------- # # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed X_L, X_U, X_all, all_anns = get_X_L_0(cfg) # # load set and model # # Please change it to the timestamp directory which you want to load data from. # last_timestamp = '/20201013_154728' # # Please change it to the cycle which you want to load data from. # load_cycle = 0 # X_L = np.load(cfg.work_directory + last_timestamp +'/X_L_' + str(load_cycle) + '.npy') # X_U = np.load(cfg.work_directory + last_timestamp +'/X_U_' + str(load_cycle) + '.npy') # cfg.cycles = list(range(load_cycle, 7)) cfg.work_directory = cfg.work_directory + '/' + timestamp mmcv.mkdir_or_exist(osp.abspath(cfg.work_directory)) np.save(cfg.work_directory + '/X_L_' + '0' + '.npy', X_L) np.save(cfg.work_directory + '/X_U_' + '0' + '.npy', X_U) initial_step = cfg.lr_config.step for cycle in cfg.cycles: # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed # get the config of the labeled dataset cfg = create_X_L_file(cfg, X_L, all_anns, cycle) # load model model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) # # Please change it to the epoch which you want to load model at. # model_file_name = '/latest.pth' # model.load_state_dict(torch.load(cfg.work_directory[:16] + last_timestamp + model_file_name)['state_dict']) # load dataset datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None and cycle == 0: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__ + get_git_hash()[:7], config=cfg.pretty_text, CLASSES=datasets[0].CLASSES) model.CLASSES = datasets[0].CLASSES for epoch in range(cfg.epoch): # Only in the last 3 epoch does the learning rate need to be reduced and the model needs to be evaluated. if epoch == cfg.epoch - 1: cfg.lr_config.step = initial_step cfg.evaluation.interval = cfg.epoch_ratio[0] else: cfg.lr_config.step = [1000] cfg.evaluation.interval = 100 # ---------- Label Set Training ---------- if epoch == 0: cfg = create_X_L_file(cfg, X_L, all_anns, cycle) datasets = [build_dataset(cfg.data.train)] losstype.update_vars(0) cfg.total_epochs = cfg.epoch_ratio[0] cfg_bak = cfg.deepcopy() time.sleep(2) for name, value in model.named_parameters(): value.requires_grad = True train_detector(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta) cfg = cfg_bak # ---------- Re-weighting and Minimizing Instance Uncertainty ---------- cfg_u = create_X_U_file(cfg.deepcopy(), X_U, all_anns, cycle) cfg = create_X_L_file(cfg, X_L, all_anns, cycle) datasets_u = [build_dataset(cfg_u.data.train)] datasets = [build_dataset(cfg.data.train)] losstype.update_vars(1) cfg_u.total_epochs = cfg_u.epoch_ratio[1] cfg.total_epochs = cfg.epoch_ratio[1] cfg_u_bak = cfg_u.deepcopy() cfg_bak = cfg.deepcopy() time.sleep(2) for name, value in model.named_parameters(): if name in cfg.theta_f_1: value.requires_grad = False elif name in cfg.theta_f_2: value.requires_grad = False else: value.requires_grad = True train_detector(model, [datasets, datasets_u], [cfg, cfg_u], distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta) cfg_u = cfg_u_bak cfg = cfg_bak # ---------- Re-weighting and Maximizing Instance Uncertainty ---------- cfg_u = create_X_U_file(cfg.deepcopy(), X_U, all_anns, cycle) cfg = create_X_L_file(cfg, X_L, all_anns, cycle) datasets_u = [build_dataset(cfg_u.data.train)] datasets = [build_dataset(cfg.data.train)] losstype.update_vars(2) cfg_u.total_epochs = cfg_u.epoch_ratio[1] cfg.total_epochs = cfg.epoch_ratio[1] cfg_u_bak = cfg_u.deepcopy() cfg_bak = cfg.deepcopy() time.sleep(2) for name, value in model.named_parameters(): if name in cfg.theta_f_1: value.requires_grad = True elif name in cfg.theta_f_2: value.requires_grad = True else: value.requires_grad = False train_detector(model, [datasets, datasets_u], [cfg, cfg_u], distributed=distributed,validate=(not args.no_validate), timestamp=timestamp, meta=meta) cfg_u = cfg_u_bak cfg = cfg_bak # ---------- Label Set Training ---------- cfg = create_X_L_file(cfg, X_L, all_anns, cycle) datasets = [build_dataset(cfg.data.train)] losstype.update_vars(0) cfg.total_epochs = cfg.epoch_ratio[0] cfg_bak = cfg.deepcopy() for name, value in model.named_parameters(): value.requires_grad = True time.sleep(2) train_detector(model, datasets, cfg, distributed=distributed, validate=args.no_validate, timestamp=timestamp, meta=meta) cfg = cfg_bak # ---------- Informative Image Selection ---------- if cycle != cfg.cycles[-1]: # get new labeled data dataset_al = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset_al, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False) # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed uncertainty = calculate_uncertainty(cfg, model, data_loader, return_box=False) # update labeled set X_L, X_U = update_X_L(uncertainty, X_all, X_L, cfg.X_S_size) # save set and model np.save(cfg.work_directory + '/X_L_' + str(cycle+1) + '.npy', X_L) np.save(cfg.work_directory + '/X_U_' + str(cycle+1) + '.npy', X_U)
def train_py(self, runstate): # manuvision config mv_config_file = "ainnovision_train.yaml" mv_config_path = os.path.join(self.py_dir, mv_config_file) mvcfg = Config.fromfile(mv_config_path) # mmseg config # mm_config_path = '../configs/pspnet/pspnet_r50-d8_yantai_st12.py' mm_config_file = "mm_det.py" mm_config_path = os.path.join(self.py_dir, mm_config_file) mmcfg = Config.fromfile(mm_config_path) cfg = merge_to_mmcfg_from_mvcfg(mmcfg, mvcfg) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # init distributed env first, since logger depends on the dist info. if cfg.get('launcher', 'none') == 'none' or len(cfg.gpu_ids) == 1: distributed = False else: distributed = True init_dist(cfg.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) cfg.trainer_csv_path = osp.join(cfg.data_root, 'train_log.csv') # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(mm_config_path))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, 'train.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds cfg.seed = cfg.get('seed', None) if cfg.seed is not None: logger.info(f'Set random seed to {cfg.seed}, deterministic: ' f'{cfg.deterministic}') set_random_seed(cfg.seed, deterministic=cfg.deterministic) meta['seed'] = cfg.seed meta['exp_name'] = osp.basename(mm_config_path) # validate cfg.validate = cfg.get('validate', True) model = build_detector(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg')) logger.info(model) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.val.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmseg version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict( mmseg_version=f'{__version__}+{get_git_hash()[:7]}', config=cfg, CLASSES=datasets[0].CLASSES, # PALETTE=datasets[0].PALETTE, ) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES trainer_detector(model, datasets, cfg, distributed=distributed, validate=cfg.validate, timestamp=timestamp, meta=meta, runstate=runstate)
def main(): settings = [ config_path, # '--gpu-ids', '0' '1', '--gpus', str(len(available_gpu_ids)), '--validate', ] # args = parse_args(settings) args = parse_args() cfg = Config.fromfile(args.config) # cfg.model.pretrained = '/home/yanqing/data/pretrained_model/mmdetection/resnet50_msra-5891d200.pth' # cfg.evaluation['interval_iter'] = 1 if args.options is not None: cfg.merge_from_dict(args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None # cfg.work_dir = osp.join('../work_dirs', osp.splitext(osp.basename(args.config))[0]) from pathlib import Path p = Path(args.config) path_1 = p.parent.name path_2 = p.stem # cfg.work_dir = osp.join('../work_dirs', f'{path_1}/{path_2}') cfg.work_dir = osp.join('/fengyouliang/model_output/work_dirs', f'{path_1}/{path_2}') if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) if args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8 # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_detector( cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict( mmdet_version=__version__, config=cfg.pretty_text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector( model, datasets, cfg, distributed=distributed, validate=args.validate, timestamp=timestamp, meta=meta)
def Train(self): self.setup() # create work_dir mmcv.mkdir_or_exist( osp.abspath(self.system_dict["local"]["cfg"].work_dir)) # dump config self.system_dict["local"]["cfg"].dump( osp.join(self.system_dict["local"]["cfg"].work_dir, osp.basename(self.system_dict["params"]["config"]))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(self.system_dict["local"]["cfg"].work_dir, f'{timestamp}.log') logger = get_root_logger( log_file=log_file, log_level=self.system_dict["local"]["cfg"].log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info( f'Distributed training: {self.system_dict["local"]["distributed"]}' ) logger.info(f'Config:\n{self.system_dict["local"]["cfg"].pretty_text}') # set random seeds if self.system_dict["params"]["seed"] is not None: logger.info( f'Set random seed to {self.system_dict["params"]["seed"]}, ' f'deterministic: {args.deterministic}') set_random_seed( self.system_dict["params"]["seed"], deterministic=self.system_dict["params"]["deterministic"]) self.system_dict["local"]["cfg"].seed = self.system_dict["params"][ "seed"] meta['seed'] = self.system_dict["params"]["seed"] model = build_detector( self.system_dict["local"]["cfg"].model, train_cfg=self.system_dict["local"]["cfg"].train_cfg, test_cfg=self.system_dict["local"]["cfg"].test_cfg) datasets = [build_dataset(self.system_dict["local"]["cfg"].data.train)] if len(self.system_dict["local"]["cfg"].workflow) == 2: val_dataset = copy.deepcopy( self.system_dict["local"]["cfg"].data.val) val_dataset.pipeline = self.system_dict["local"][ "cfg"].data.train.pipeline datasets.append(build_dataset(val_dataset)) if self.system_dict["local"]["cfg"].checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data self.system_dict["local"]["cfg"].checkpoint_config.meta = dict( mmdet_version=__version__ + get_git_hash()[:7], config=self.system_dict["local"]["cfg"].pretty_text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES print("Classes to be trained: {}".format(model.CLASSES)) train_detector( model, datasets, self.system_dict["local"]["cfg"], distributed=self.system_dict["local"]["distributed"], validate=(not self.system_dict["params"]["no_validate"]), timestamp=timestamp, meta=meta)
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) if args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8 # create config file in workdir work_dir = osp.abspath(cfg.work_dir) if os.path.exists(work_dir): print("Error: workdir path already exists") return mmcv.mkdir_or_exist(work_dir) config_name = osp.join( work_dir, '{}.py'.format( osp.splitext(osp.basename(args.config))[0] + time.strftime('_%Y%m%d_%H%M%S', time.localtime(time.time())))) shutil.copy(args.config, config_name) # with open(config_name, 'w+') as f: # f.write(cfg.dump()) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) log_file = osp.join(cfg.work_dir, '{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) if cfg.get('version', None) is not None: env_info = dict(config_path=config_name, log_path=osp.abspath(log_file + '.json')) with open('env_info.json', 'w+') as f: json.dump(env_info, f) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([('{}: {}'.format(k, v)) for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info('Distributed training: {}'.format(distributed)) logger.info('Config:\n{}'.format(cfg.text)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}, deterministic: {}'.format( args.seed, args.deterministic)) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed cfg.data.train['quiet'] = True datasets = [build_dataset(cfg.data.train)] # load anchors if isinstance(cfg.model, dict) and cfg.model.get( 'type', 'FasterRCNN') == 'MyFasterRCNN': anchors = dict() with open(os.path.join(cfg.work_dir, 'anchors.json'), 'r') as f: anchors = json.load(f) logger.info('loaded anchors: {}\n'.format(anchors)) cfg.model['anchors'] = anchors model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES my_config.set('classes', model.CLASSES) train_detector(model, datasets, cfg, distributed=distributed, validate=args.validate, timestamp=timestamp, meta=meta)
def main(): args = parse_args() cfg = Config.fromfile(args.config) cfg_samples_per_gpu = cfg.data.samples_per_gpu if args.update_config is not None: cfg.merge_from_dict(args.update_config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) if args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8 # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) if args.tensorboard_dir is not None: hooks = [ hook for hook in cfg.log_config.hooks if hook.type == 'TensorboardLoggerHook' ] if hooks: hooks[0].log_dir = args.tensorboard_dir else: logger.warning('Failed to find TensorboardLoggerHook') # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] dataset_len_per_gpu = sum(len(dataset) for dataset in datasets) if distributed: dataset_len_per_gpu = dataset_len_per_gpu // get_dist_info()[1] assert dataset_len_per_gpu > 0 if cfg.data.samples_per_gpu == 'auto': if torch.cuda.is_available(): logger.info(f'Auto-selection of samples per gpu (batch size).') cfg.data.samples_per_gpu = determine_max_batch_size( cfg, distributed, dataset_len_per_gpu) logger.info( f'Auto selected batch size: {cfg.data.samples_per_gpu} {dataset_len_per_gpu}' ) cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) else: logger.warning( 'Auto-selection of batch size is not implemented for CPU.') logger.warning( 'Setting batch size to value taken from configuration file.') cfg.data.samples_per_gpu = cfg_samples_per_gpu if dataset_len_per_gpu < cfg.data.samples_per_gpu: cfg.data.samples_per_gpu = dataset_len_per_gpu logger.warning( f'Decreased samples_per_gpu to: {cfg.data.samples_per_gpu} ' f'because of dataset length: {dataset_len_per_gpu} ' f'and gpus number: {get_dist_info()[1]}') if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.pretty_text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta)
def main(): args = parse_args() cfg = Config.fromfile(args.config) if args.options is not None: cfg.merge_from_dict(args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.load_from is not None: cfg.load_from = args.load_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # record pid if platform.system() == 'Windows' and args.record_pid: training_info_path = record_pid(timestamp, cfg.file_path) try: # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.pretty_text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES with open(os.path.join(cfg.work_dir, 'class_id_to_name.json'), 'w') as f: json.dump( { str(class_id): class_name for class_id, class_name in enumerate(datasets[0].CLASSES) }, f, indent=4) train_detector(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta) except Exception as e: if platform.system() == 'Windows' and args.record_pid: record_end_info(training_info_path, str(e)) else: if platform.system() == 'Windows' and args.record_pid: record_end_info(training_info_path, 'EndNormal')
def main(): # args = ['./DOTA_configs/DOTA_hbb/retinanet_r50_fpn_2x_dota.py', # '--gpus', '1', # '--no-validate', # '--work-dir', './results/retinanet_hbb_tv' # ] args = [ './DOTA_configs/DOTA_obb/retinanet_r50_fpn_2x_dota.py', '--gpus', '1', '--no-validate', '--work-dir', './results/retinanet_obb_tv_ver1_cv2_no_trick' ] # # args = ['./DOTA_configs/DOTA_obb/faster_rcnn_r50_fpn_1x_dota.py', # '--gpus', '4', # '--no-validate', # '--work-dir', './results/faster_obb_tv_ver1_cv2_no_trick' # ] # args = ['./DOTA_configs/DOTA_obb/faster_rcnn_InLD_r50_fpn_2x_dota.py', # '--gpus', '8', # '--no-validate', # '--work-dir', './results/faster_obb_tv_ver1_cv2_InLD' # ] # args = ['./DOTA_configs/DOTA_obb/s2anet_r50_fpn_1x_dota.py', # './results/DOTA_s2anet_obb_tv/epoch_24.pth', # '--out', './results/DOTA_s2anet_obb_tv/results.pkl', # '--eval', 'bbox' # ] # # # args = ['./DOTA_configs/DIOR/retinanet_r50_fpn_2x.py', # '--gpus', '2', # '--no-validate', # '--work-dir', './results/retina_test' # ] # # args = ['./configs/detr/detr_r50_8x2_150e_coco.py', # '--gpus', '4', # # '--no-validate', # '--work-dir', './results/detr_baseline' # ] # args = ['./DOTA_configs/General_RS_hbb/detr_r50_8x2_150e.py', # '--gpus', '4', # '--no-validate', # '--work-dir', './results/DIOR_detr_full' # ] # # args = ['./DOTA_configs/DOTA_obb/s2anet_r50_fpn_1x_dota.py', # '--gpus', '1', # '--no-validate', # '--work-dir', './results/DOTA_s2anet_obb_tv' # ] # args = ['./DOTA_configs/DOTA_obb/faster_rcnn_r50_fpn_1x_dota.py', # '--gpus', '1', # '--no-validate', # '--work-dir', './results/DOTA_faster_obb_tv_1GPU_cv2_no_trick' # ] args = [ './DOTA_configs/DOTA_obb/faster_rcnn_RoITrans_r50_fpn_1x_dota.py', '--gpus', '1', '--no-validate', '--work-dir', './results/DOTA_faster_rcnn_RoITrans_tv' ] # args = ['./DOTA_configs/DOTA_obb/faster_rcnn_r50_fpn_1x_dota.py', # '--gpus', '1', # '--no-validate', # '--work-dir', './results/DOTA_faster_obb_tv_1GPU_cv2_no_trick' # ] args = parse_args(args) print(args) cfg = Config.fromfile(args.config) if args.options is not None: cfg.merge_from_dict(args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) if args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8 # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] print(len(datasets[0])) if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.pretty_text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector_ad(model, datasets, cfg, distributed=False, validate=(not args.no_validate), timestamp=timestamp, meta=meta)
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # 如果不指定work_dir,则采用配置文件名作为work_dir路径 cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) # create work_dir mmdet.cv_core.mkdir_or_exist(osp.abspath(cfg.work_dir)) # 保存py配置到work_dir路径下 cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info meta['config'] = cfg.pretty_text # log some basic info logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed meta['exp_name'] = osp.basename(args.config) model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: # 这个验证仅仅是计算验证集的loss等而已,不会进行评估 val_dataset = copy.deepcopy(cfg.data.val) # 验证时候采用的也是train的pipeline,感觉是不科学的 TODO val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, validate=(not args.no_validate), timestamp=timestamp, meta=meta)
def main(): ''' # 1、设定和读取各种配置; :return: ''' args = parse_args() #cfg: configs/*.py文件里的参数 cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) if args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8 # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, '{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([('{}: {}'.format(k, v)) for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info('Distributed training: {}'.format(distributed)) logger.info('Config:\n{}'.format(cfg.text)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}, deterministic: {}'.format( args.seed, args.deterministic)) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed ''' # 2、创建模型 第一个参数cfg.model模型配置里面必须要有一个种类type,包括经典的算法如Faster RCNN, MaskRCNN等 其次,还包含几个部分,如backbone, neck, head backbone有深度,stage等信息,如resnet50对应着3,4,6,3四个重复stages neck一般FPN(feature pyramid network),需要指定num_outs几个输出之类的信息(之后会看到) head 就是具体到上层rpn_head, shared_head, bbox_head之类的 如果不清楚我们可以去某个config里面验证一下 返回的是一个类的对象,详见下面的build函数 :return: ''' model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) ''' # 3、创建数据集(使用configs下的py文件里的data字典的train字段) :return: ''' datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: #是否添加验证集 val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES #现在的问题cfg不知道怎么搞 # cfg: configs/*.py文件里的参数 ''' # 4、将模型,数据集和配置传进训练函数 :return: ''' train_detector(model, datasets, cfg, distributed=distributed, validate=args.validate, timestamp=timestamp, meta=meta)
def main(): args = parse_args() cfg = Config.fromfile(args.config) if args.options is not None: cfg.merge_from_dict(args.options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) if args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8 # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) from mmcv.runner import get_dist_info rank, world_size = get_dist_info() if rank == 0: print(model) print("Model have {} paramerters.".format( sum(x.numel() for x in model.parameters()) / 1e6)) if hasattr(model, 'backbone'): print("Model has {} backbone.".format( sum(x.numel() for x in model.backbone.parameters()) / 1e6)) if hasattr(model, 'neck'): print("Model has {} neck.".format( sum(x.numel() for x in model.neck.parameters()) / 1e6)) if hasattr(model, 'roi_head'): print("Model has {} bbox head.".format( sum(x.numel() for x in model.roi_head.bbox_head.parameters()) / 1e6)) if hasattr(model, 'bbox_head'): print("Model has {} bbox head.".format( sum(x.numel() for x in model.bbox_head.parameters()) / 1e6)) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.pretty_text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=args.validate, timestamp=timestamp, meta=meta)
def maintrain(args): cfg = Config.fromfile(args.config) # New add to setup the dataset, no need to change the configuration file cfg.dataset_type = 'CocoDataset' cfg.data.test.type = 'CocoDataset' cfg.data.test.data_root = args.data_root cfg.data.test.ann_file = args.data_root + 'annotations_val20new.json' #'annotations_valallnew.json' cfg.data.test.img_prefix = '' cfg.data.train.type = 'CocoDataset' cfg.data.train.data_root = args.data_root cfg.data.train.ann_file = args.data_root + 'annotations_train20new.json' #'annotations_trainallnew.json' cfg.data.train.img_prefix = '' cfg.data.val.type = 'CocoDataset' cfg.data.val.data_root = args.data_root cfg.data.val.ann_file = args.data_root + 'annotations_val20new.json' #'annotations_valallnew.json' cfg.data.val.img_prefix = '' #batch size=2, workers=0, eta: 1 day, 5:56:54, memory: 5684 cfg.data.samples_per_gpu = 4 #batch size cfg.data.workers_per_gpu = 4 #eta: 1 day, 6:17:04, memory: 10234 # modify num classes of the model in box head cfg.model.roi_head.bbox_head.num_classes = len(args.classes) # 4 # import modules from string list. if cfg.get('custom_imports', None): #not used from mmcv.utils import import_modules_from_strings import_modules_from_strings(**cfg['custom_imports']) # set cudnn_benchmark, benchmark mode is good whenever your input sizes for your network do not vary. This way, cudnn will look for the optimal set of algorithms for that particular configuration (which takes some time). This usually leads to faster runtime. if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.workdir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.workdir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) cfg.load_from = args.checkpoint if args.resumefrom is not None: cfg.resume_from = args.resumefrom if args.gpuids is not None: cfg.gpu_ids = args.gpuids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info meta['config'] = cfg.pretty_text # log some basic info distributed = False logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds if args.seed is not None: #not used logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed meta['exp_name'] = osp.basename(args.config) model = build_detector(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg')) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__ + get_git_hash()[:7], CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = args.classes #datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=(not args.novalidate), timestamp=timestamp, meta=meta)
def main(): # 命令行解析库获取配置输入 args = parse_args() # 从配置文件路径获取配置信息 cfg = Config.fromfile(args.config) # 可选选项整合 if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # import modules from string list.(从字符串列表导入模块) if cfg.get('custom_imports', None): from mmcv.utils import import_modules_from_strings import_modules_from_strings(**cfg['custom_imports']) # set cudnn_benchmark(设定cudnn基准加速训练 链接:https://blog.csdn.net/tuntunmmd/article/details/90229836) if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename(工作目录的优先级:命令行输入>文件片段>文件名) if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None(按命令行输入配置工作目录) cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None(当命令行输入为None时以配置文件名创建工作目录) cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) # 是否断点训练(注意resume_from与load_from的区别) if args.resume_from is not None: cfg.resume_from = args.resume_from # 配置gpu对象 if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) # init distributed env first, since logger depends on the dist info.(鉴于logger对象依赖于分布式环境信息,所以首先初始化分布式环境) if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # re-set gpu_ids with distributed training mode(在分布式训练模式下重置gpu分配情况) _, world_size = get_dist_info() cfg.gpu_ids = range(world_size) # create work_dir(创建工作目录) mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config(导出当前训练配置信息到工作目录下) cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps(初始化looger模块) timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged(初始化meta字典用以记录一些关于环境的重要信息) meta = dict() # log env info(打印环境信息) env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info meta['config'] = cfg.pretty_text # log some basic info(打印一些基本的信息) logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds(设置随机种子) if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed meta['exp_name'] = osp.basename(args.config) # 基于装饰器模式创建检测器(传入模型配置信息,训练配置与测试配置) model = build_detector( cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) # 创建数据库对象 datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # 设置保存权重轮数 # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict( mmdet_version=__version__ + get_git_hash()[:7], CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES # 进入训练方法 train_detector( model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta)
def main(): args = parse_args() cfg = Config.fromfile(args.config) # update data root according to MMDET_DATASETS update_data_root(cfg) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # set multi-process settings setup_multi_processes(cfg) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from cfg.auto_resume = args.auto_resume if args.gpus is not None: cfg.gpu_ids = range(1) warnings.warn('`--gpus` is deprecated because we only support ' 'single GPU mode in non-distributed training. ' 'Use `gpus=1` now.') if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids[0:1] warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. ' 'Because we only support single GPU mode in ' 'non-distributed training. Use the first GPU ' 'in `gpu_ids` now.') if args.gpus is None and args.gpu_ids is None: cfg.gpu_ids = [args.gpu_id] # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # re-set gpu_ids with distributed training mode _, world_size = get_dist_info() cfg.gpu_ids = range(world_size) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info meta['config'] = cfg.pretty_text # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds seed = init_random_seed(args.seed) seed = seed + dist.get_rank() if args.diff_seed else seed logger.info(f'Set random seed to {seed}, ' f'deterministic: {args.deterministic}') set_random_seed(seed, deterministic=args.deterministic) cfg.seed = seed meta['seed'] = seed meta['exp_name'] = osp.basename(args.config) model = build_detector(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg')) model.init_weights() datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__ + get_git_hash()[:7], CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta)
def test(args, model_dir, distributed): global has_imagenet_reassessed args.work_dir = model_dir model_name = osp.basename(model_dir) orig_top1_acc = 0. orig_top1_acc_reallabels = 0. # remove top1 tag tag = '-top1-' idx = model_name.find(tag) tag1 = '-top1_reallabels-' idx1 = model_name.find(tag1) if idx >= 0: orig_top1_acc = float(model_name[idx + len(tag):idx1 - 1]) / 100. if idx1 > 0: orig_top1_acc_reallabels = float( model_name[idx1 + len(tag1):-2]) / 100. model_name = model_name[:idx] # len_model_name = max(len_model_name, len(model_name)) args.config = os.path.join(model_dir, f'{model_name}.py') if not os.path.exists(args.config): print(f'Not found {args.config}') return None cfg = Config.fromfile(args.config) # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) cfg.amp_opt_level = args.amp_opt_level if not has_apex: cfg.amp_opt_level = 'O0' cfg.amp_static_loss_scale = args.amp_static_loss_scale cfg.print_freq = args.print_freq cfg.seed = args.seed # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, 'evalImageNetX.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # log env info env_info_dict = collect_env() env_info = '\n'.join([('{}: {}'.format(k, v)) for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) # log cfg logger.info('Distributed training: {}'.format(distributed)) # set random seeds if args.seed is None: args.seed = 23 logger.info('Set random seed to {}, deterministic: {}'.format( args.seed, args.deterministic)) set_random_seed(args.seed, deterministic=args.deterministic) # model model = build_backbone(cfg.model) num_params = sum([m.numel() for m in model.parameters()]) / 1e6 logger.info('Model {} created, param count: {:.3f}M'.format( model_name, num_params)) ckpt_file = os.path.join(model_dir, 'current.pth') load_checkpoint(model, ckpt_file, logger=logger) # ckpt = torch.load(ckpt_file, map_location='cpu') # state_dict = ckpt['model'] # for k in list(state_dict.keys()): # if k.startswith('module.'): # # remove prefix # state_dict[k[len("module."):]] = state_dict[k] # # delete renamed k # del state_dict[k] # model.load_state_dict(state_dict) if not distributed and len(cfg.gpu_ids) > 1: if cfg.amp_opt_level != 'O0': logger.warning( 'AMP does not work well with nn.DataParallel, disabling.' + 'Use distributed mode for multi-GPU AMP.') cfg.amp_opt_level = 'O0' model = nn.DataParallel(model, device_ids=list(cfg.gpu_ids)).cuda() else: model.cuda() # loss criterion_val = torch.nn.CrossEntropyLoss().cuda() # optimizer lr = cfg.optimizer['lr'] lr *= cfg.batch_size * dist.get_world_size() / cfg.autoscale_lr_factor optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=cfg.optimizer['momentum'], weight_decay=cfg.optimizer['weight_decay'], nesterov=cfg.optimizer['nesterov']) if cfg.amp_opt_level != 'O0': loss_scale = cfg.amp_static_loss_scale if cfg.amp_static_loss_scale \ else 'dynamic' model, optimizer = amp.initialize(model, optimizer, opt_level=cfg.amp_opt_level, loss_scale=loss_scale, verbosity=1) model = AttnNorm2Float(model) if distributed: if cfg.amp_opt_level != 'O0': model = DDP(model, delay_allreduce=True) else: model = DDP1(model, device_ids=[args.local_rank]) result = [model_name, num_params] for dataset in imagenet_x: # data if dataset == 'imagenet-1k': cfg.data_root = 'data/ILSVRC2015/Data/CLS-LOC' else: cfg.data_root = f'data/{dataset}' if not os.path.exists(cfg.data_root): logger.info(f'not found {cfg.data_root}') continue indices_in_1k = None if dataset in ['imagenet-a', 'imagenet-o']: indices_in_1k = adv_indices_in_1k real_labels = False if dataset == 'imagenet-1k': real_labels_file = os.path.join(cfg.data_root, 'reassessed-imagenet', 'real.json') if os.path.exists(real_labels_file): val_loader = get_val_loader(cfg, cfg.data_cfg['val_cfg'], distributed, real_json=real_labels_file) real_labels = True has_imagenet_reassessed = True else: logger.info( f'not found {cfg.data_root} {real_labels_file} ' + 'consider to download real labels at ' + 'https://github.com/google-research/reassessed-imagenet') val_loader = get_val_loader(cfg, cfg.data_cfg['val_cfg'], distributed) else: val_loader = get_val_loader(cfg, cfg.data_cfg['val_cfg'], distributed) # eval results = validate(val_loader, model, criterion_val, cfg, logger, distributed, indices_in_1k=indices_in_1k, real_labels=real_labels) result.append((round(results[0], 3), round(results[1], 3))) logger.info( f'** {model_name} - {dataset} top1-acc {results[0]:.3%}, top5-acc {results[1]:.3%}' ) if len(results) == 4: result.append((round(results[2], 3), round(results[3], 3))) logger.info( f'** {model_name} - {dataset} top1-acc_reallabels {results[2]:.3%}, top5-acc_reallabels {results[3]:.3%}' ) return result
def main( config=None, fold=None, work_dir=None, resume_from=None, load_from=None, no_validate=False, gpus=None, gpu_ids=None, seed=None, deterministic=False, options=None, launcher="none", local_rank=0, ): cfg = Config.fromfile(config) if fold is not None: if "ann_file" in cfg.data.train: if isinstance(cfg.data.train.ann_file, list): cfg.data.train.ann_file = [ x.format(fold=fold) for x in cfg.data.train.ann_file ] elif isinstance(cfg.data.train.ann_file, str): cfg.data.train.ann_file = cfg.data.train.ann_file.format( fold=fold) else: cfg.data.train.dataset.ann_file = cfg.data.train.dataset.ann_file.format( fold=fold) cfg.data.val.ann_file = cfg.data.val.ann_file.format(fold=fold) cfg.data.test.ann_file = cfg.data.test.ann_file.format(fold=fold) if options is not None: cfg.merge_from_dict(options) # set cudnn_benchmark if cfg.get("cudnn_benchmark", False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = work_dir elif cfg.get("work_dir", None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join("/dumps/work_dirs", osp.splitext(osp.basename(config))[0], str(fold)) if resume_from is not None: cfg.resume_from = resume_from if load_from is not None: cfg.load_from = load_from if gpu_ids is not None: cfg.gpu_ids = gpu_ids else: cfg.gpu_ids = range(1) if gpus is None else range(gpus) # init distributed env first, since logger depends on the dist info. if launcher == "none": distributed = False else: distributed = True init_dist(launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime("%Y%m%d_%H%M%S", time.localtime()) log_file = osp.join(cfg.work_dir, f"{timestamp}.log") logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = "\n".join([f"{k}: {v}" for k, v in env_info_dict.items()]) dash_line = "-" * 60 + "\n" logger.info("Environment info:\n" + dash_line + env_info + "\n" + dash_line) meta["env_info"] = env_info # log some basic info logger.info(f"Distributed training: {distributed}") logger.info(f"Config:\n{cfg.pretty_text}") # set random seeds if seed is not None: logger.info(f"Set random seed to {seed}, " f"deterministic: {deterministic}") set_random_seed(seed, deterministic=deterministic) cfg.seed = seed meta["seed"] = seed model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.pretty_text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=(not no_validate), timestamp=timestamp, meta=meta)
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) if args.autoscale_lr: # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8 # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, '{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) logger.info(pprint.pformat(edict(cfg))) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([('{}: {}'.format(k, v)) for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info('Distributed training: {}'.format(distributed)) logger.info('Config:\n{}'.format(cfg.text)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}, deterministic: {}'.format( args.seed, args.deterministic)) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) val_dataset.pipeline = cfg.data.train.pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.text, CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=args.validate, timestamp=timestamp, meta=meta)
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.start_epoch is not None: cfg.start_epoch = args.start_epoch if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) cfg.amp_opt_level = args.amp_opt_level if not has_apex: cfg.amp_opt_level = 'O0' cfg.amp_static_loss_scale = args.amp_static_loss_scale cfg.eval = args.eval if cfg.eval: assert os.path.isfile(cfg.load_from) cfg.debug = args.debug cfg.print_freq = args.print_freq if not cfg.debug else 10 cfg.save_freq = args.save_freq cfg.profiling = args.profiling if args.seed is None: args.seed = 23 # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, '{}.log'.format(timestamp)) logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # log env info env_info_dict = collect_env() env_info = '\n'.join([('{}: {}'.format(k, v)) for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) # log cfg logger.info('Distributed training: {}'.format(distributed)) logger.info('Config:\n{}'.format(cfg.text)) # set random seeds if args.seed is not None: logger.info('Set random seed to {}, deterministic: {}'.format( args.seed, args.deterministic)) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed # model model = build_backbone(cfg.model) logger.info('Model {} created, param count: {:.3f}M'.format( cfg.model['type'], sum([m.numel() for m in model.parameters()]) / 1e6)) if cfg.debug and dist.get_rank() == 0: print(model) if cfg.eval: load_pretrained(cfg, model, logger) if not distributed and len(cfg.gpu_ids) > 1: if cfg.amp_opt_level != 'O0': logger.warning( 'AMP does not work well with nn.DataParallel, disabling.' + 'Use distributed mode for multi-GPU AMP.') cfg.amp_opt_level = 'O0' model = nn.DataParallel(model, device_ids=list(cfg.gpu_ids)).cuda() else: model.cuda() # data fast_collate_mixup = None if hasattr(cfg.data_cfg['train_cfg'], 'mix_up_rate') and \ cfg.data_cfg['train_cfg']['mix_up_rate'] > 0.: fast_collate_mixup = FastCollateMixup( cfg.data_cfg['train_cfg']['mix_up_rate'], cfg.data_cfg['train_cfg']['label_smoothing_rate'], cfg.data_cfg['train_cfg']['num_classes'] ) train_loader = get_train_loader( cfg, cfg.data_cfg['train_cfg'], distributed, fast_collate_mixup=fast_collate_mixup) real_labels_file = os.path.join( cfg.data_root, 'reassessed-imagenet', 'real.json') if os.path.exists(real_labels_file): val_loader = get_val_loader(cfg, cfg.data_cfg['val_cfg'], distributed, real_json=real_labels_file) real_labels = True else: logger.info(f'not found {cfg.data_root} {real_labels_file} ' + 'consider to download real labels at ' + 'https://github.com/google-research/reassessed-imagenet') val_loader = get_val_loader(cfg, cfg.data_cfg['val_cfg'], distributed) real_labels = False # loss if hasattr(cfg.data_cfg['train_cfg'], 'mix_up_rate') and \ cfg.data_cfg['train_cfg']['mix_up_rate'] > 0.: criterion_train = SoftTargetCrossEntropy().cuda() criterion_val = torch.nn.CrossEntropyLoss().cuda() elif hasattr(cfg.data_cfg['train_cfg'], 'label_smoothing_rate') and \ cfg.data_cfg['train_cfg']['label_smoothing_rate'] > 0.: criterion_train = LabelSmoothingCrossEntropy( cfg.data_cfg['train_cfg']['label_smoothing_rate'] ).cuda() criterion_val = torch.nn.CrossEntropyLoss().cuda() else: criterion_train = torch.nn.CrossEntropyLoss().cuda() criterion_val = criterion_train # optimizer lr = cfg.optimizer['lr'] lr *= cfg.batch_size * dist.get_world_size() / cfg.autoscale_lr_factor if hasattr(cfg.optimizer, 'remove_norm_weigth_decay') and \ cfg.optimizer['remove_norm_weigth_decay']: norm_params, base_params = separate_norm_params(model) optimizer = torch.optim.SGD([ {'params': base_params, 'weight_decay': cfg.optimizer['weight_decay']}, {'params': norm_params, 'weight_decay': 0.0}], lr=lr, momentum=cfg.optimizer['momentum'], nesterov=cfg.optimizer['nesterov']) else: optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=cfg.optimizer['momentum'], weight_decay=cfg.optimizer['weight_decay'], nesterov=cfg.optimizer['nesterov']) if cfg.amp_opt_level != 'O0': loss_scale = cfg.amp_static_loss_scale if cfg.amp_static_loss_scale \ else 'dynamic' model, optimizer = amp.initialize(model, optimizer, opt_level=cfg.amp_opt_level, loss_scale=loss_scale, verbosity=1) model = AttnNorm2Float(model) if distributed: if cfg.amp_opt_level != 'O0': model = DDP(model, delay_allreduce=True) else: model = DDP1(model, device_ids=[args.local_rank]) if cfg.profiling: x = torch.randn((2, 3, 224, 224), requires_grad=True).cuda() with torch.autograd.profiler.profile(use_cuda=True) as prof: model(x) prof.export_chrome_trace(os.path.join(cfg.work_dir, 'profiling.log')) logger.info(f"{prof}") return # scheduler scheduler = get_scheduler(optimizer, len(train_loader), cfg) # eval if cfg.eval: validate(val_loader, model, criterion_val, cfg, logger, distributed, real_labels=real_labels) return # optionally resume from a checkpoint if cfg.resume_from: assert os.path.isfile(cfg.resume_from) load_checkpoint(cfg, model, optimizer, scheduler, logger) # training for epoch in range(cfg.start_epoch, cfg.total_epochs + 1): if isinstance(train_loader.sampler, DistributedSampler): train_loader.sampler.set_epoch(epoch) tic = time.time() train(epoch, train_loader, model, criterion_train, optimizer, scheduler, cfg, logger, distributed) used_time = time.time() - tic remaining_time = (cfg.total_epochs - epoch) * used_time / 3600 logger.info( f'epoch {epoch}, total time {used_time:.2f} sec, estimated remaining time {remaining_time:.2f} hr') if real_labels is not None: test_acc, is_best, _, is_best_reallabels = validate( val_loader, model, criterion_val, cfg, logger, distributed, real_labels=real_labels) if dist.get_rank() == 0 and (epoch % cfg.save_freq == 0 or is_best or is_best_reallabels): save_checkpoint(cfg, epoch, model, optimizer, best_acc1, best_acc1_reallabels, scheduler, logger, is_best, is_best_reallabels) else: test_acc, is_best = validate( val_loader, model, criterion_val, cfg, logger, distributed) if dist.get_rank() == 0 and (epoch % cfg.save_freq == 0 or is_best): save_checkpoint(cfg, epoch, model, optimizer, best_acc1, best_acc1_reallabels, scheduler, logger, is_best, False) if cfg.debug: break # rename folder if dist.get_rank() == 0: os.rename(cfg.work_dir, cfg.work_dir+f'-top1-{best_acc1:.2%}')