def main(): # 1. Get input arguments args = get_args() # 2. Create config instance from args above cfg = get_default_config() cfg.use_gpu = torch.cuda.is_available() if args.config_file: cfg.merge_from_file(args.config_file) reset_config(cfg, args) cfg.merge_from_list(args.opts) set_random_seed(cfg.train.seed) log_name = 'test.log' if cfg.test.evaluate else 'train.log' log_name += time.strftime('-%Y-%m-%d-%H-%M-%S') sys.stdout = Logger(osp.join(cfg.data.save_dir, log_name)) print('Show configuration\n{}\n'.format(cfg)) print('Collecting env info ...') print('** System info **\n{}\n'.format(collect_env_info())) if cfg.use_gpu: torch.backends.cudnn.benchmark = True # 3. Create DataManager Instance datamanager = build_datamanager(cfg) print('Building model: {}'.format(cfg.model.name)) model = torchreid.models.build_model( name=cfg.model.name, num_classes=datamanager.num_train_pids, loss=cfg.loss.name, pretrained=cfg.model.pretrained, use_gpu=cfg.use_gpu) num_params, flops = compute_model_complexity( model, (1, 3, cfg.data.height, cfg.data.width)) print('Model complexity: params={:,} flops={:,}'.format(num_params, flops)) if cfg.model.load_weights and check_isfile(cfg.model.load_weights): load_pretrained_weights(model, cfg.model.load_weights) if cfg.use_gpu: model = nn.DataParallel(model).cuda() optimizer = torchreid.optim.build_optimizer(model, **optimizer_kwargs(cfg)) scheduler = torchreid.optim.build_lr_scheduler(optimizer, **lr_scheduler_kwargs(cfg)) if cfg.model.resume and check_isfile(cfg.model.resume): cfg.train.start_epoch = resume_from_checkpoint(cfg.model.resume, model, optimizer=optimizer, scheduler=scheduler) print('Building {}-engine for {}-reid'.format(cfg.loss.name, cfg.data.type)) # Build engine and run engine = build_engine(cfg, datamanager, model, optimizer, scheduler) engine.run(**engine_run_kwargs(cfg))
def main(): global args set_random_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() and not args.use_cpu log_name = 'test.log' if args.evaluate else 'train.log' log_name += time.strftime('-%Y-%m-%d-%H-%M-%S') sys.stdout = Logger(osp.join(args.save_dir, log_name)) print('** Arguments **') arg_keys = list(args.__dict__.keys()) arg_keys.sort() for key in arg_keys: print('{}: {}'.format(key, args.__dict__[key])) print('\n') print('Collecting env info ...') print('** System info **\n{}\n'.format(collect_env_info())) if use_gpu: torch.backends.cudnn.benchmark = True else: warnings.warn( 'Currently using CPU, however, GPU is highly recommended') datamanager = build_datamanager(args) print('Building model: {}'.format(args.arch)) model = torchreid.models.build_model( name=args.arch, num_classes=datamanager.num_train_pids, loss=args.loss.lower(), pretrained=(not args.no_pretrained), use_gpu=use_gpu) num_params, flops = compute_model_complexity( model, (1, 3, args.height, args.width)) print('Model complexity: params={:,} flops={:,}'.format(num_params, flops)) if args.load_weights and check_isfile(args.load_weights): load_pretrained_weights(model, args.load_weights) if use_gpu: model = nn.DataParallel(model).cuda() optimizer = torchreid.optim.build_optimizer(model, **optimizer_kwargs(args)) scheduler = torchreid.optim.build_lr_scheduler(optimizer, **lr_scheduler_kwargs(args)) if args.resume and check_isfile(args.resume): args.start_epoch = resume_from_checkpoint(args.resume, model, optimizer=optimizer) print('Building {}-engine for {}-reid'.format(args.loss, args.app)) engine = build_engine(args, datamanager, model, optimizer, scheduler) engine.run(**engine_run_kwargs(args))
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--config-file', type=str, default='', help='path to config file') parser.add_argument('-s', '--sources', type=str, nargs='+', help='source datasets (delimited by space)') parser.add_argument('-t', '--targets', type=str, nargs='+', help='target datasets (delimited by space)') parser.add_argument('--root', type=str, default='', help='path to data root') parser.add_argument('opts', default=None, nargs=argparse.REMAINDER, help='Modify config options using the command-line') args = parser.parse_args() cfg = get_default_config() cfg.use_gpu = torch.cuda.is_available() if args.config_file: cfg.merge_from_file(args.config_file) reset_config(cfg, args) cfg.merge_from_list(args.opts) set_random_seed(cfg.train.seed) log_name = 'test.log' if cfg.test.evaluate else 'train.log' log_name += time.strftime('-%Y-%m-%d-%H-%M-%S') sys.stdout = Logger(osp.join(cfg.data.save_dir, log_name)) print('Show configuration\n{}\n'.format(cfg)) print('Collecting env info ...') print('** System info **\n{}\n'.format(collect_env_info())) if cfg.use_gpu: torch.backends.cudnn.benchmark = True datamanager = build_datamanager(cfg) print('Building model: {}'.format(cfg.model.name)) model = torchreid.models.build_model(**model_kwargs(cfg, datamanager.num_train_pids)) num_params, flops = compute_model_complexity(model, (1, 3, cfg.data.height, cfg.data.width)) print('Model complexity: params={:,} flops={:,}'.format(num_params, flops)) if cfg.model.load_weights and check_isfile(cfg.model.load_weights): if cfg.model.pretrained and not cfg.test.evaluate: state_dict = torch.load(cfg.model.load_weights) model.load_pretrained_weights(state_dict) else: load_pretrained_weights(model, cfg.model.load_weights) if cfg.use_gpu: model = nn.DataParallel(model).cuda() optimizer = torchreid.optim.build_optimizer(model, **optimizer_kwargs(cfg)) scheduler = torchreid.optim.build_lr_scheduler(optimizer, **lr_scheduler_kwargs(cfg)) if cfg.model.resume and check_isfile(cfg.model.resume): cfg.train.start_epoch = resume_from_checkpoint( cfg.model.resume, model, optimizer=optimizer, scheduler=scheduler ) print('Building {}-engine for {}-reid'.format(cfg.loss.name, cfg.data.type)) engine = build_engine(cfg, datamanager, model, optimizer, scheduler) engine.run(**engine_run_kwargs(cfg))
def run_lr_finder(cfg, datamanager, model, optimizer, scheduler, classes, rebuild_model=True, gpu_num=1, split_models=False): if not rebuild_model: backup_model = deepcopy(model) engine = build_engine(cfg, datamanager, model, optimizer, scheduler, initial_lr=cfg.train.lr) lr_finder = LrFinder(engine=engine, **lr_finder_run_kwargs(cfg)) aux_lr = lr_finder.process() print(f"Estimated learning rate: {aux_lr}") if cfg.lr_finder.stop_after: print("Finding learning rate finished. Terminate the training process") sys.exit(0) # reload all parts of the training # we do not check classification parameters # and do not get num_train_classes the second time # since it's done above and lr finder cannot change parameters of the datasets cfg.train.lr = aux_lr cfg.lr_finder.enable = False set_random_seed(cfg.train.seed, cfg.train.deterministic) datamanager = build_datamanager(cfg, classes) num_train_classes = datamanager.num_train_pids if rebuild_model: backup_model = torchreid.models.build_model( **model_kwargs(cfg, num_train_classes)) num_aux_models = len(cfg.mutual_learning.aux_configs) backup_model, _ = put_main_model_on_the_device(backup_model, cfg.use_gpu, gpu_num, num_aux_models, split_models) optimizer = torchreid.optim.build_optimizer(backup_model, **optimizer_kwargs(cfg)) scheduler = torchreid.optim.build_lr_scheduler( optimizer=optimizer, num_iter=datamanager.num_iter, **lr_scheduler_kwargs(cfg)) return cfg.train.lr, backup_model, optimizer, scheduler
def main(): global args set_random_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = (torch.cuda.is_available() and not args.use_cpu) log_name = 'test.log' if args.evaluate else 'train.log' sys.stdout = Logger(osp.join(args.save_dir, log_name)) print('==========\nArgs:{}\n=========='.format(args)) if use_gpu: print('Currently using GPU {}'.format(args.gpu_devices)) torch.backends.cudnn.benchmark = True else: warnings.warn( 'Currently using CPU, however, GPU is highly recommended') datamanager = build_datamanager(args) print('Building model: {}'.format(args.arch)) model = torchreid.models.build_model( name=args.arch, num_classes=datamanager.num_train_pids, loss=args.loss.lower(), pretrained=(not args.no_pretrained), use_gpu=use_gpu) num_params, flops = compute_model_complexity( model, (1, 3, args.height, args.width)) print('Model complexity: params={:,} flops={:,}'.format(num_params, flops)) if args.load_weights and check_isfile(args.load_weights): load_pretrained_weights(model, args.load_weights) if use_gpu: model = nn.DataParallel(model).cuda() optimizer = torchreid.optim.build_optimizer(model, **optimizer_kwargs(args)) scheduler = torchreid.optim.build_lr_scheduler(optimizer, **lr_scheduler_kwargs(args)) if args.resume and check_isfile(args.resume): args.start_epoch = resume_from_checkpoint(args.resume, model, optimizer=optimizer) print('Building {}-engine for {}-reid'.format(args.loss, args.app)) engine = build_engine(args, datamanager, model, optimizer, scheduler) engine.run(**engine_run_kwargs(args))
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument( '--config-file', type=str, default='', help='path to config file' ) parser.add_argument( '-s', '--sources', type=str, nargs='+', help='source datasets (delimited by space)' ) parser.add_argument( '-t', '--targets', type=str, nargs='+', help='target datasets (delimited by space)' ) parser.add_argument( '--transforms', type=str, nargs='+', help='data augmentation' ) parser.add_argument( '--root', type=str, default='', help='path to data root' ) parser.add_argument( '--gpu-devices', type=str, default='', ) parser.add_argument( 'opts', default=None, nargs=argparse.REMAINDER, help='Modify config options using the command-line' ) args = parser.parse_args() cfg = get_default_config() cfg.use_gpu = torch.cuda.is_available() if args.config_file: cfg.merge_from_file(args.config_file) reset_config(cfg, args) cfg.merge_from_list(args.opts) set_random_seed(cfg.train.seed) if cfg.use_gpu and args.gpu_devices: # if gpu_devices is not specified, all available gpus will be used os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices log_name = 'test.log' if cfg.test.evaluate else 'train.log' log_name += time.strftime('-%Y-%m-%d-%H-%M-%S') sys.stdout = Logger(osp.join(cfg.data.save_dir, log_name)) print('Show configuration\n{}\n'.format(cfg)) print('Collecting env info ...') print('** System info **\n{}\n'.format(collect_env_info())) if cfg.use_gpu: torch.backends.cudnn.benchmark = True datamanager = torchreid.data.ImageDataManager(**imagedata_kwargs(cfg)) print('Building model: {}'.format(cfg.model.name)) model = osnet_models.build_model( cfg.model.name, num_classes=datamanager.num_train_pids ) num_params, flops = compute_model_complexity( model, (1, 3, cfg.data.height, cfg.data.width) ) print('Model complexity: params={:,} flops={:,}'.format(num_params, flops)) if cfg.use_gpu: model = nn.DataParallel(model).cuda() optimizer = torchreid.optim.build_optimizer(model, **optimizer_kwargs(cfg)) scheduler = torchreid.optim.build_lr_scheduler( optimizer, **lr_scheduler_kwargs(cfg) ) if cfg.model.resume and check_isfile(cfg.model.resume): cfg.train.start_epoch = resume_from_checkpoint( cfg.model.resume, model, optimizer=optimizer ) print('Building NAS engine') engine = ImageSoftmaxNASEngine( datamanager, model, optimizer, scheduler=scheduler, use_gpu=cfg.use_gpu, label_smooth=cfg.loss.softmax.label_smooth, mc_iter=cfg.nas.mc_iter, init_lmda=cfg.nas.init_lmda, min_lmda=cfg.nas.min_lmda, lmda_decay_step=cfg.nas.lmda_decay_step, lmda_decay_rate=cfg.nas.lmda_decay_rate, fixed_lmda=cfg.nas.fixed_lmda ) engine.run(**engine_run_kwargs(cfg)) print('*** Display the found architecture ***') if cfg.use_gpu: model.module.build_child_graph() else: model.build_child_graph()
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--config-file', type=str, default='', help='path to config file') parser.add_argument('-s', '--sources', type=str, nargs='+', help='source datasets (delimited by space)') parser.add_argument('-t', '--targets', type=str, nargs='+', help='target datasets (delimited by space)') parser.add_argument('--transforms', type=str, nargs='+', help='data augmentation') parser.add_argument('--root', type=str, default='', help='path to data root') parser.add_argument('opts', default=None, nargs=argparse.REMAINDER, help='Modify config options using the command-line') args = parser.parse_args() cfg = get_default_config() cfg.use_gpu = torch.cuda.is_available() if args.config_file: cfg.merge_from_file(args.config_file) reset_config(cfg, args) cfg.merge_from_list(args.opts) set_random_seed(cfg.train.seed) log_name = 'test.log' if cfg.test.evaluate else 'train.log' log_name += time.strftime('-%Y-%m-%d-%H-%M-%S') sys.stdout = Logger(osp.join(cfg.data.save_dir, log_name)) print('Show configuration\n{}\n'.format(cfg)) print('Collecting env info ...') print('** System info **\n{}\n'.format(collect_env_info())) if cfg.use_gpu: torch.backends.cudnn.benchmark = True datamanager = torchreid.data.ImageDataManager(**imagedata_kwargs(cfg)) print('Building model-1: {}'.format(cfg.model.name)) model1 = torchreid.models.build_model( name=cfg.model.name, num_classes=datamanager.num_train_pids, loss=cfg.loss.name, pretrained=cfg.model.pretrained, use_gpu=cfg.use_gpu) num_params, flops = compute_model_complexity( model1, (1, 3, cfg.data.height, cfg.data.width)) print('Model complexity: params={:,} flops={:,}'.format(num_params, flops)) print('Copying model-1 to model-2') model2 = copy.deepcopy(model1) if cfg.model.load_weights1 and check_isfile(cfg.model.load_weights1): load_pretrained_weights(model1, cfg.model.load_weights1) if cfg.model.load_weights2 and check_isfile(cfg.model.load_weights2): load_pretrained_weights(model2, cfg.model.load_weights2) if cfg.use_gpu: model1 = nn.DataParallel(model1).cuda() model2 = nn.DataParallel(model2).cuda() optimizer1 = torchreid.optim.build_optimizer(model1, **optimizer_kwargs(cfg)) scheduler1 = torchreid.optim.build_lr_scheduler(optimizer1, **lr_scheduler_kwargs(cfg)) optimizer2 = torchreid.optim.build_optimizer(model2, **optimizer_kwargs(cfg)) scheduler2 = torchreid.optim.build_lr_scheduler(optimizer2, **lr_scheduler_kwargs(cfg)) if cfg.model.resume1 and check_isfile(cfg.model.resume1): cfg.train.start_epoch = resume_from_checkpoint(cfg.model.resume1, model1, optimizer=optimizer1, scheduler=scheduler1) if cfg.model.resume2 and check_isfile(cfg.model.resume2): resume_from_checkpoint(cfg.model.resume2, model2, optimizer=optimizer2, scheduler=scheduler2) print('Building DML-engine for image-reid') engine = ImageDMLEngine(datamanager, model1, optimizer1, scheduler1, model2, optimizer2, scheduler2, margin=cfg.loss.triplet.margin, weight_t=cfg.loss.triplet.weight_t, weight_x=cfg.loss.triplet.weight_x, weight_ml=cfg.loss.dml.weight_ml, use_gpu=cfg.use_gpu, label_smooth=cfg.loss.softmax.label_smooth, deploy=cfg.model.deploy) engine.run(**engine_run_kwargs(cfg))
def main(): parser = build_base_argparser() parser.add_argument('-e', '--auxiliary-models-cfg', type=str, nargs='*', default='', help='path to extra config files') parser.add_argument('--split-models', action='store_true', help='whether to split models on own gpu') parser.add_argument('--enable_quantization', action='store_true', help='Enable NNCF quantization algorithm') parser.add_argument('--enable_pruning', action='store_true', help='Enable NNCF pruning algorithm') parser.add_argument( '--aux-config-opts', nargs='+', default=None, help='Modify aux config options using the command-line') args = parser.parse_args() cfg = get_default_config() cfg.use_gpu = torch.cuda.is_available() and args.gpu_num > 0 if args.config_file: merge_from_files_with_base(cfg, args.config_file) reset_config(cfg, args) cfg.merge_from_list(args.opts) is_nncf_used = args.enable_quantization or args.enable_pruning if is_nncf_used: print(f'Using NNCF -- making NNCF changes in config') cfg = make_nncf_changes_in_config(cfg, args.enable_quantization, args.enable_pruning, args.opts) set_random_seed(cfg.train.seed, cfg.train.deterministic) log_name = 'test.log' if cfg.test.evaluate else 'train.log' log_name += time.strftime('-%Y-%m-%d-%H-%M-%S') sys.stdout = Logger(osp.join(cfg.data.save_dir, log_name)) print('Show configuration\n{}\n'.format(cfg)) if cfg.use_gpu: torch.backends.cudnn.benchmark = True num_aux_models = len(cfg.mutual_learning.aux_configs) datamanager = build_datamanager(cfg, args.classes) num_train_classes = datamanager.num_train_pids print('Building main model: {}'.format(cfg.model.name)) model = torchreid.models.build_model( **model_kwargs(cfg, num_train_classes)) macs, num_params = get_model_complexity_info( model, (3, cfg.data.height, cfg.data.width), as_strings=False, verbose=False, print_per_layer_stat=False) print('Main model complexity: params={:,} flops={:,}'.format( num_params, macs * 2)) aux_lr = cfg.train.lr # placeholder, needed for aux models, may be filled by nncf part below if is_nncf_used: print('Begin making NNCF changes in model') if cfg.use_gpu: model.cuda() compression_ctrl, model, cfg, aux_lr, nncf_metainfo = \ make_nncf_changes_in_training(model, cfg, args.classes, args.opts) should_freeze_aux_models = True print(f'should_freeze_aux_models = {should_freeze_aux_models}') print('End making NNCF changes in model') else: compression_ctrl = None should_freeze_aux_models = False nncf_metainfo = None # creating optimizer and scheduler -- it should be done after NNCF part, since # NNCF could change some parameters optimizer = torchreid.optim.build_optimizer(model, **optimizer_kwargs(cfg)) if cfg.lr_finder.enable and not cfg.model.resume: scheduler = None else: scheduler = torchreid.optim.build_lr_scheduler( optimizer=optimizer, num_iter=datamanager.num_iter, **lr_scheduler_kwargs(cfg)) # Loading model (and optimizer and scheduler in case of resuming training). # Note that if NNCF is used, loading is done inside NNCF part, so loading here is not required. if cfg.model.resume and check_isfile( cfg.model.resume) and not is_nncf_used: device_ = 'cuda' if cfg.use_gpu else 'cpu' cfg.train.start_epoch = resume_from_checkpoint(cfg.model.resume, model, optimizer=optimizer, scheduler=scheduler, device=device_) elif cfg.model.load_weights and not is_nncf_used: load_pretrained_weights(model, cfg.model.load_weights) if cfg.model.type == 'classification': check_classification_classes(model, datamanager, args.classes, test_only=cfg.test.evaluate) model, extra_device_ids = put_main_model_on_the_device( model, cfg.use_gpu, args.gpu_num, num_aux_models, args.split_models) if cfg.lr_finder.enable and not cfg.test.evaluate and not cfg.model.resume: aux_lr, model, optimizer, scheduler = run_lr_finder( cfg, datamanager, model, optimizer, scheduler, args.classes, rebuild_model=True, gpu_num=args.gpu_num, split_models=args.split_models) log_dir = cfg.data.tb_log_dir if cfg.data.tb_log_dir else cfg.data.save_dir run_training(cfg, datamanager, model, optimizer, scheduler, extra_device_ids, aux_lr, tb_writer=SummaryWriter(log_dir=log_dir), should_freeze_aux_models=should_freeze_aux_models, nncf_metainfo=nncf_metainfo, compression_ctrl=compression_ctrl)
def train(self, dataset: DatasetEntity, output_model: ModelEntity, train_parameters: Optional[TrainParameters] = None): """ Trains a model on a dataset """ train_model = deepcopy(self._model) if train_parameters is not None: update_progress_callback = train_parameters.update_progress else: update_progress_callback = default_progress_callback time_monitor = TrainingProgressCallback( update_progress_callback, num_epoch=self._cfg.train.max_epoch, num_train_steps=math.ceil( len(dataset.get_subset(Subset.TRAINING)) / self._cfg.train.batch_size), num_val_steps=0, num_test_steps=0) self.metrics_monitor = DefaultMetricsMonitor() self.stop_callback.reset() set_random_seed(self._cfg.train.seed) train_subset = dataset.get_subset(Subset.TRAINING) val_subset = dataset.get_subset(Subset.VALIDATION) self._cfg.custom_datasets.roots = [ OTEClassificationDataset(train_subset, self._labels, self._multilabel, keep_empty_label=self._empty_label in self._labels), OTEClassificationDataset(val_subset, self._labels, self._multilabel, keep_empty_label=self._empty_label in self._labels) ] datamanager = torchreid.data.ImageDataManager( **imagedata_kwargs(self._cfg)) num_aux_models = len(self._cfg.mutual_learning.aux_configs) if self._cfg.use_gpu: main_device_ids = list(range(self.num_devices)) extra_device_ids = [main_device_ids for _ in range(num_aux_models)] train_model = DataParallel(train_model, device_ids=main_device_ids, output_device=0).cuda( main_device_ids[0]) else: extra_device_ids = [None for _ in range(num_aux_models)] optimizer = torchreid.optim.build_optimizer( train_model, **optimizer_kwargs(self._cfg)) if self._cfg.lr_finder.enable: scheduler = None else: scheduler = torchreid.optim.build_lr_scheduler( optimizer, num_iter=datamanager.num_iter, **lr_scheduler_kwargs(self._cfg)) if self._cfg.lr_finder.enable: _, train_model, optimizer, scheduler = \ run_lr_finder(self._cfg, datamanager, train_model, optimizer, scheduler, None, rebuild_model=False, gpu_num=self.num_devices, split_models=False) _, final_acc = run_training(self._cfg, datamanager, train_model, optimizer, scheduler, extra_device_ids, self._cfg.train.lr, tb_writer=self.metrics_monitor, perf_monitor=time_monitor, stop_callback=self.stop_callback) training_metrics = self._generate_training_metrics_group() self.metrics_monitor.close() if self.stop_callback.check_stop(): logger.info('Training cancelled.') return logger.info("Training finished.") best_snap_path = os.path.join(self._scratch_space, 'best.pth') if os.path.isfile(best_snap_path): load_pretrained_weights(self._model, best_snap_path) for filename in os.listdir(self._scratch_space): match = re.match(r'best_(aux_model_[0-9]+\.pth)', filename) if match: aux_model_name = match.group(1) best_aux_snap_path = os.path.join(self._scratch_space, filename) self._aux_model_snap_paths[aux_model_name] = best_aux_snap_path self.save_model(output_model) performance = Performance(score=ScoreMetric(value=final_acc, name="accuracy"), dashboard_metrics=training_metrics) logger.info(f'FINAL MODEL PERFORMANCE {performance}') output_model.performance = performance
def main(): parser = build_base_argparser() args = parser.parse_args() cfg = get_default_config() cfg.use_gpu = torch.cuda.is_available() and args.gpu_num > 0 if args.config_file: merge_from_files_with_base(cfg, args.config_file) reset_config(cfg, args) cfg.merge_from_list(args.opts) is_ie_model = cfg.model.load_weights.endswith('.xml') if not is_ie_model: compression_hyperparams = get_compression_hyperparams( cfg.model.load_weights) is_nncf_used = compression_hyperparams[ 'enable_quantization'] or compression_hyperparams['enable_pruning'] if is_nncf_used: print(f'Using NNCF -- making NNCF changes in config') cfg = make_nncf_changes_in_config( cfg, compression_hyperparams['enable_quantization'], compression_hyperparams['enable_pruning'], args.opts) else: is_nncf_used = False set_random_seed(cfg.train.seed) log_name = 'test.log' + time.strftime('-%Y-%m-%d-%H-%M-%S') sys.stdout = Logger(osp.join(cfg.data.save_dir, log_name)) datamanager = torchreid.data.ImageDataManager(filter_classes=args.classes, **imagedata_kwargs(cfg)) num_classes = len( datamanager.test_loader[cfg.data.targets[0]]['query'].dataset.classes) cfg.train.ema.enable = False if not is_ie_model: model = torchreid.models.build_model(**model_kwargs(cfg, num_classes)) load_pretrained_weights(model, cfg.model.load_weights) if is_nncf_used: print('Begin making NNCF changes in model') model = make_nncf_changes_in_eval(model, cfg) print('End making NNCF changes in model') if cfg.use_gpu: num_devices = min(torch.cuda.device_count(), args.gpu_num) main_device_ids = list(range(num_devices)) model = DataParallel(model, device_ids=main_device_ids, output_device=0).cuda(main_device_ids[0]) else: from torchreid.utils.ie_tools import VectorCNN from openvino.inference_engine import IECore cfg.test.batch_size = 1 model = VectorCNN(IECore(), cfg.model.load_weights, 'CPU', switch_rb=True, **model_kwargs(cfg, num_classes)) for _, dataloader in datamanager.test_loader.items(): dataloader['query'].dataset.transform.transforms = \ dataloader['query'].dataset.transform.transforms[:-2] if cfg.model.type == 'classification': check_classification_classes(model, datamanager, args.classes, test_only=True) engine = build_engine(cfg=cfg, datamanager=datamanager, model=model, optimizer=None, scheduler=None) engine.test(0, dist_metric=cfg.test.dist_metric, normalize_feature=cfg.test.normalize_feature, visrank=cfg.test.visrank, visrank_topk=cfg.test.visrank_topk, save_dir=cfg.data.save_dir, use_metric_cuhk03=cfg.cuhk03.use_metric_cuhk03, ranks=(1, 5, 10, 20), rerank=cfg.test.rerank)
def main(): global args set_random_seed(args.seed) use_gpu = torch.cuda.is_available() and not args.use_cpu log_name = 'test.log' if args.evaluate else 'train.log' sys.stdout = Logger(osp.join(args.save_dir, log_name)) print('** Arguments **') arg_keys = list(args.__dict__.keys()) arg_keys.sort() for key in arg_keys: print('{}: {}'.format(key, args.__dict__[key])) print('\n') print('Collecting env info ...') print('** System info **\n{}\n'.format(collect_env_info())) if use_gpu: torch.backends.cudnn.benchmark = True else: warnings.warn( 'Currently using CPU, however, GPU is highly recommended') dataset_vars = init_dataset(use_gpu) trainloader, valloader, testloader, num_attrs, attr_dict = dataset_vars if args.weighted_bce: print('Use weighted binary cross entropy') print('Computing the weights ...') bce_weights = torch.zeros(num_attrs, dtype=torch.float) for _, attrs, _ in trainloader: bce_weights += attrs.sum(0) # sum along the batch dim bce_weights /= len(trainloader) * args.batch_size print('Sample ratio for each attribute: {}'.format(bce_weights)) bce_weights = torch.exp(-1 * bce_weights) print('BCE weights: {}'.format(bce_weights)) bce_weights = bce_weights.expand(args.batch_size, num_attrs) criterion = nn.BCEWithLogitsLoss(weight=bce_weights) else: print('Use plain binary cross entropy') criterion = nn.BCEWithLogitsLoss() print('Building model: {}'.format(args.arch)) model = models.build_model(args.arch, num_attrs, pretrained=not args.no_pretrained, use_gpu=use_gpu) num_params, flops = compute_model_complexity( model, (1, 3, args.height, args.width)) print('Model complexity: params={:,} flops={:,}'.format(num_params, flops)) if args.load_weights and check_isfile(args.load_weights): load_pretrained_weights(model, args.load_weights) if use_gpu: model = nn.DataParallel(model).cuda() criterion = criterion.cuda() if args.evaluate: test(model, testloader, attr_dict, use_gpu) return optimizer = torchreid.optim.build_optimizer(model, **optimizer_kwargs(args)) scheduler = torchreid.optim.build_lr_scheduler(optimizer, **lr_scheduler_kwargs(args)) start_epoch = args.start_epoch best_result = -np.inf if args.resume and check_isfile(args.resume): checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] best_result = checkpoint['label_mA'] print('Loaded checkpoint from "{}"'.format(args.resume)) print('- start epoch: {}'.format(start_epoch)) print('- label_mA: {}'.format(best_result)) time_start = time.time() for epoch in range(start_epoch, args.max_epoch): train(epoch, model, criterion, optimizer, scheduler, trainloader, use_gpu) test_outputs = test(model, testloader, attr_dict, use_gpu) label_mA = test_outputs[0] is_best = label_mA > best_result if is_best: best_result = label_mA save_checkpoint( { 'state_dict': model.state_dict(), 'epoch': epoch + 1, 'label_mA': label_mA, 'optimizer': optimizer.state_dict(), }, args.save_dir, is_best=is_best) elapsed = round(time.time() - time_start) elapsed = str(datetime.timedelta(seconds=elapsed)) print('Elapsed {}'.format(elapsed))
def main(): # parse arguments parser = build_base_argparser() parser.add_argument('-e', '--auxiliary-models-cfg', type=str, nargs='*', default='', help='path to extra config files') parser.add_argument('--split-models', action='store_true', help='whether to split models on own gpu') parser.add_argument( '--aux-config-opts', nargs='+', default=None, help='Modify aux config options using the command-line') parser.add_argument('--epochs', default=10, type=int, help='amount of the epochs') args = parser.parse_args() cfg = get_default_config() cfg.use_gpu = torch.cuda.is_available() and args.gpu_num > 0 if args.config_file: merge_from_files_with_base(cfg, args.config_file) reset_config(cfg, args) cfg.merge_from_list(args.opts) set_random_seed(cfg.train.seed, cfg.train.deterministic) log_name = 'optuna.log' log_name += time.strftime('-%Y-%m-%d-%H-%M-%S') sys.stdout = Logger(osp.join(cfg.data.save_dir, log_name)) print('Show configuration\n{}\n'.format(cfg)) if cfg.use_gpu: torch.backends.cudnn.benchmark = True sampler = TPESampler(n_startup_trials=5, seed=True) study = optuna.create_study(study_name='classification task', direction="maximize", sampler=sampler) objective_partial = partial(objective, cfg, args) try: start_time = time.time() study.optimize(objective_partial, n_trials=cfg.lr_finder.n_trials, timeout=None) elapsed = round(time.time() - start_time) print( f"--- optimization is finished: {datetime.timedelta(seconds=elapsed)} ---" ) except KeyboardInterrupt: finish_process(study) else: finish_process(study)
def optimize( self, optimization_type: OptimizationType, dataset: DatasetEntity, output_model: ModelEntity, optimization_parameters: Optional[OptimizationParameters], ): """ Optimize a model on a dataset """ if optimization_type is not OptimizationType.NNCF: raise RuntimeError('NNCF is the only supported optimization') if self._compression_ctrl: raise RuntimeError('The model is already optimized. NNCF requires the original model for optimization.') if self._cfg.train.ema.enable: raise RuntimeError('EMA model could not be used together with NNCF compression') if self._cfg.lr_finder.enable: raise RuntimeError('LR finder could not be used together with NNCF compression') aux_pretrained_dicts = self._load_aux_models_data(self._task_environment.model) num_aux_models = len(self._cfg.mutual_learning.aux_configs) num_aux_pretrained_dicts = len(aux_pretrained_dicts) if num_aux_models != num_aux_pretrained_dicts: raise RuntimeError('The pretrained weights are not provided for all aux models.') if optimization_parameters is not None: update_progress_callback = optimization_parameters.update_progress else: update_progress_callback = default_progress_callback time_monitor = TrainingProgressCallback(update_progress_callback, num_epoch=self._cfg.train.max_epoch, num_train_steps=math.ceil(len(dataset.get_subset(Subset.TRAINING)) / self._cfg.train.batch_size), num_val_steps=0, num_test_steps=0) self.metrics_monitor = DefaultMetricsMonitor() self.stop_callback.reset() set_random_seed(self._cfg.train.seed) train_subset = dataset.get_subset(Subset.TRAINING) val_subset = dataset.get_subset(Subset.VALIDATION) self._cfg.custom_datasets.roots = [OTEClassificationDataset(train_subset, self._labels, self._multilabel, keep_empty_label=self._empty_label in self._labels), OTEClassificationDataset(val_subset, self._labels, self._multilabel, keep_empty_label=self._empty_label in self._labels)] datamanager = torchreid.data.ImageDataManager(**imagedata_kwargs(self._cfg)) self._compression_ctrl, self._model, self._nncf_metainfo = \ wrap_nncf_model(self._model, self._cfg, datamanager_for_init=datamanager) self._cfg.train.lr = calculate_lr_for_nncf_training(self._cfg, self._initial_lr, False) train_model = self._model if self._cfg.use_gpu: main_device_ids = list(range(self.num_devices)) extra_device_ids = [main_device_ids for _ in range(num_aux_models)] train_model = DataParallel(train_model, device_ids=main_device_ids, output_device=0).cuda(main_device_ids[0]) else: extra_device_ids = [None for _ in range(num_aux_models)] optimizer = torchreid.optim.build_optimizer(train_model, **optimizer_kwargs(self._cfg)) scheduler = torchreid.optim.build_lr_scheduler(optimizer, num_iter=datamanager.num_iter, **lr_scheduler_kwargs(self._cfg)) logger.info('Start training') run_training(self._cfg, datamanager, train_model, optimizer, scheduler, extra_device_ids, self._cfg.train.lr, should_freeze_aux_models=True, aux_pretrained_dicts=aux_pretrained_dicts, tb_writer=self.metrics_monitor, perf_monitor=time_monitor, stop_callback=self.stop_callback, nncf_metainfo=self._nncf_metainfo, compression_ctrl=self._compression_ctrl) self.metrics_monitor.close() if self.stop_callback.check_stop(): logger.info('Training cancelled.') return logger.info('Training completed') self.save_model(output_model) output_model.model_format = ModelFormat.BASE_FRAMEWORK output_model.optimization_type = self._optimization_type output_model.optimization_methods = self._optimization_methods output_model.precision = self._precision
def main(): # Load model configuration parser = argparse.ArgumentParser() parser.add_argument('-c', '--config', required=True, help='path to configuration file') args = parser.parse_args() with open(args.config, "r") as ymlfile: config = yaml.load(ymlfile, Loader=yaml.FullLoader) # Automatically add sub-folder name to config["save_dir"], with the same name # as the config file. For example, config["save_dir"] is typically "logs", # so this would change config["save_dir"] to "logs/exp01", for example, so that # we don't need to change the save_dir in every single config file (it instead # automatically generates it from the name of the config file). experiment_number = pathlib.Path(args.config).stem config["save_dir"] = os.path.join(config["save_dir"], experiment_number) # Set random seeds set_random_seed(config["seed"]) # Set up GPU if not config["use_avai_gpus"]: os.environ['CUDA_VISIBLE_DEVICES'] = config["gpu_devices"] use_gpu = torch.cuda.is_available() and not config["use_cpu"] # Set up log files log_name = 'test.log' if config["evaluate"] else 'train.log' log_name += time.strftime('-%Y-%m-%d-%H-%M-%S') sys.stdout = Logger(osp.join(config["save_dir"], log_name)) # Prepare for training print('==========\nArgs:{}\n=========='.format(config)) print('Collecting env info ...') print('** System info **\n{}\n'.format(collect_env_info())) if use_gpu: torch.backends.cudnn.benchmark = True else: warnings.warn( 'Currently using CPU, however, GPU is highly recommended') # Build datamanager and model datamanager = build_datamanager(config) print('Building model: {}'.format(config["arch"])) model = torchreid.models.build_model( name=config["arch"], num_classes=datamanager.num_train_pids, loss=config["loss"].lower(), pretrained=(not config["no_pretrained"]), use_gpu=use_gpu) # Compute model complexity num_params, flops = compute_model_complexity( model, (1, 3, config["height"], config["width"])) print('Model complexity: params={:,} flops={:,}'.format(num_params, flops)) # Load pretrained weights if necessary if config["load_weights"] and check_isfile(config["load_weights"]): load_pretrained_weights(model, config["load_weights"]) # Set up multi-gpu if use_gpu: model = nn.DataParallel(model).cuda() # Model settings optimizer = torchreid.optim.build_optimizer(model, **optimizer_kwargs(config)) scheduler = torchreid.optim.build_lr_scheduler( optimizer, **lr_scheduler_kwargs(config)) if config["resume"] and check_isfile(config["resume"]): config["start_epoch"] = resume_from_checkpoint(config["resume"], model, optimizer=optimizer) print('Building {}-engine for {}-reid'.format(config["loss"], config["app"])) engine = build_engine(config, datamanager, model, optimizer, scheduler) engine.run(**engine_run_kwargs(config))
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--config-file', type=str, default='', required=True, help='path to config file') parser.add_argument( '--custom-roots', type=str, nargs='+', help= 'types or paths to annotation of custom datasets (delimited by space)') parser.add_argument('--custom-types', type=str, nargs='+', help='path of custom datasets (delimited by space)') parser.add_argument('--custom-names', type=str, nargs='+', help='names of custom datasets (delimited by space)') parser.add_argument('--root', type=str, default='', help='path to data root') parser.add_argument('--classes', type=str, nargs='+', help='name of classes in classification dataset') parser.add_argument('--out') parser.add_argument('opts', default=None, nargs=argparse.REMAINDER, help='Modify config options using the command-line') args = parser.parse_args() cfg = get_default_config() cfg.use_gpu = torch.cuda.is_available() if args.config_file: merge_from_files_with_base(cfg, args.config_file) reset_config(cfg, args) cfg.merge_from_list(args.opts) set_random_seed(cfg.train.seed) print('Show configuration\n{}\n'.format(cfg)) print('Collecting env info ...') print('** System info **\n{}\n'.format(collect_env_info())) if cfg.use_gpu: torch.backends.cudnn.benchmark = True datamanager = build_datamanager(cfg, args.classes) num_train_classes = datamanager.num_train_pids print('Building main model: {}'.format(cfg.model.name)) model = torchreid.models.build_model( **model_kwargs(cfg, num_train_classes)) macs, num_params = get_model_complexity_info( model, (3, cfg.data.height, cfg.data.width), as_strings=False, verbose=False, print_per_layer_stat=False) print('Main model complexity: M params={:,} G flops={:,}'.format( num_params / 10**6, macs * 2 / 10**9)) if args.out: out = list() out.append({ 'key': 'size', 'display_name': 'Size', 'value': num_params / 10**6, 'unit': 'Mp' }) out.append({ 'key': 'complexity', 'display_name': 'Complexity', 'value': 2 * macs / 10**9, 'unit': 'GFLOPs' }) print('dump to' + args.out) with open(args.out, 'w') as write_file: json.dump(out, write_file, indent=4)