def main(): print("evaluate start") # set default gpu device id # torch.cuda.set_device(config.gpus[0]) # set seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) if config.deterministic: torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.backends.cudnn.enabled = True else: torch.backends.cudnn.benchmark = True # get data with meta info if config.data_loader_type == 'torch': input_size, input_channels, n_classes, train_data, valid_data = get_data.get_data( config.dataset, config.data_path, config.cutout_length, auto_augmentation=config.auto_augmentation) # train_loader = torch.utils.data.DataLoader(train_data, # batch_size=config.batch_size, # shuffle=True, # num_workers=config.workers, # pin_memory=True) valid_loader = torch.utils.data.DataLoader( valid_data, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=False) elif config.data_loader_type == 'dali': input_size, input_channels, n_classes, train_data, valid_data = get_data.get_data_dali( config.dataset, config.data_path, batch_size=config.batch_size, num_threads=config.workers) # train_loader = train_data valid_loader = valid_data else: raise NotImplementedError use_aux = config.aux_weight > 0. if config.model_method == 'darts_NAS': if config.genotype is None: config.genotype = get_model.get_model(config.model_method, config.model_name) if 'imagenet' in config.dataset.lower(): model = AugmentCNN_ImageNet(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype) else: model = AugmentCNN(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype) elif config.model_method == 'my_model_collection': from models.my_searched_model import my_specialized if config.structure_path is None: _ = config.model_name.split(':') net_config_path = os.path.join(project_path, 'models', 'my_model_collection', _[0], _[1] + '.json') else: net_config_path = config.structure_path # model = my_specialized(num_classes=n_classes, net_config=net_config_path, # dropout_rate=config.dropout_rate) model = my_specialized(num_classes=n_classes, net_config=net_config_path, dropout_rate=0) else: model_fun = get_model.get_model(config.model_method, config.model_name) # model = model_fun(num_classes=n_classes, dropout_rate=config.dropout_rate) model = model_fun(num_classes=n_classes, dropout_rate=0) # load model ckpt = torch.load(config.pretrained) print(ckpt.keys()) # for k in model: # print(k) # return # set bn # model.set_bn_param(config.bn_momentum, config.bn_eps) for _key in list(ckpt['state_dict_ema'].keys()): if 'total_ops' in _key or 'total_params' in _key: del ckpt['state_dict_ema'][_key] model.load_state_dict(ckpt['state_dict_ema']) # model init # model.init_model(model_init=config.model_init) model.cuda() # model size total_ops, total_params = flops_counter.profile( model, [1, input_channels, input_size, input_size]) print("Model size = {:.3f} MB".format(total_params)) print("Model FLOPS with input {} = {:.3f} M".format( str([1, input_channels, input_size, input_size]), total_ops)) total_ops, total_params = flops_counter.profile(model, [1, 3, 224, 224]) print("Model FLOPS with input [1,3,224,224] {:.3f} M".format(total_ops)) model = nn.DataParallel(model).to(device) # CRITERION if config.label_smoothing > 0: from utils import LabelSmoothLoss criterion = LabelSmoothLoss( smoothing=config.label_smoothing).to(device) else: criterion = nn.CrossEntropyLoss().to(device) best_top1 = validate(valid_loader, model, criterion, 0, 0) print("Final best Prec@1 = {:.4%}".format(best_top1))
def main(): logger.info("Logger is set - training start") # set default gpu device id # torch.cuda.set_device(config.gpus[0]) # set seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) if config.deterministic: torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.backends.cudnn.enabled = True else: torch.backends.cudnn.benchmark = True # get data with meta info if config.data_loader_type == 'torch': input_size, input_channels, n_classes, train_data, valid_data = get_data.get_data( config.dataset, config.data_path, config.cutout_length, auto_augmentation=config.auto_augmentation) train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True) elif config.data_loader_type == 'dali': input_size, input_channels, n_classes, train_data, valid_data = get_data.get_data_dali( config.dataset, config.data_path, batch_size=config.batch_size, num_threads=config.workers) train_loader = train_data valid_loader = valid_data else: raise NotImplementedError if config.label_smoothing > 0: from utils import LabelSmoothLoss criterion = LabelSmoothLoss(smoothing=config.label_smoothing).to(device) else: criterion = nn.CrossEntropyLoss().to(device) use_aux = config.aux_weight > 0. if config.model_method == 'darts_NAS': if config.genotype is None: config.genotype = get_model.get_model(config.model_method, config.model_name) if 'imagenet' in config.dataset.lower(): model = AugmentCNN_ImageNet(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype) else: model = AugmentCNN(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype) elif config.model_method == 'my_model_collection': from models.my_searched_model import my_specialized if config.structure_path is None: _ = config.model_name.split(':') net_config_path = os.path.join(project_path, 'models', 'my_model_collection', _[0], _[1] + '.json') else: net_config_path = config.structure_path model = my_specialized(num_classes=n_classes, net_config=net_config_path, dropout_rate=config.dropout_rate) else: model_fun = get_model.get_model(config.model_method, config.model_name) model = model_fun(num_classes=n_classes, dropout_rate=config.dropout_rate) # set bn model.set_bn_param(config.bn_momentum, config.bn_eps) # model init model.init_model(model_init=config.model_init) model.cuda() # model size total_ops, total_params = flops_counter.profile(model, [1, input_channels, input_size, input_size]) logger.info("Model size = {:.3f} MB".format(total_params)) logger.info("Model FLOPS with input {} = {:.3f} M".format(str([1, input_channels, input_size, input_size]), total_ops)) total_ops, total_params = flops_counter.profile(model, [1, 3, 224, 224]) logger.info("Model FLOPS with input [1,3,224,224] {:.3f} M".format(total_ops)) model = nn.DataParallel(model).to(device) # weights optimizer if not config.no_decay_keys == 'None': keys = config.no_decay_keys.split('#') optimizer = torch.optim.SGD([ {'params': model.module.get_parameters(keys, mode='exclude'), 'weight_decay': config.weight_decay}, {'params': model.module.get_parameters(keys, mode='include'), 'weight_decay': 0}, ], lr=config.lr, momentum=config.momentum) else: optimizer = torch.optim.SGD(model.parameters(), config.lr, momentum=config.momentum, weight_decay=config.weight_decay) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config.epochs) best_top1 = 0. # training loop _size = get_iterator_length(train_loader) for epoch in range(config.epochs): lr_scheduler.step() if config.drop_path_prob > 0: drop_prob = config.drop_path_prob * epoch / config.epochs model.module.drop_path_prob(drop_prob) # training train(train_loader, model, optimizer, criterion, epoch) # validation cur_step = (epoch+1) * _size top1 = validate(valid_loader, model, criterion, epoch, cur_step) # save if best_top1 < top1: best_top1 = top1 is_best = True logger.info("Current best Prec@1 = {:.4%}".format(best_top1)) else: is_best = False utils.save_checkpoint(model, config.path, is_best) print("") logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
def main(): args.prefetcher = not args.no_prefetcher args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 if args.distributed and args.num_gpu > 1: logger.warning( 'Using more than one GPU per process in distributed mode is not allowed. Setting num_gpu to 1.' ) args.num_gpu = 1 args.device = 'cuda:0' args.world_size = 1 args.rank = 0 # global rank if args.distributed: args.num_gpu = 1 args.device = 'cuda:%d' % args.local_rank torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.rank = torch.distributed.get_rank() assert args.rank >= 0 if args.distributed: logger.info( 'Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.' % (args.rank, args.world_size)) else: logger.info('Training with a single process on %d GPUs.' % args.num_gpu) torch.manual_seed(args.seed + args.rank) # my model use_aux = args.aux_weight > 0. if args.model_method == 'darts_NAS': if args.genotype is None: args.genotype = get_model.get_model(args.model_method, args.model_name) model = AugmentCNN_ImageNet(224, 3, args.init_channels, args.num_classes, args.layers, use_aux, args.genotype) elif args.model_method == 'my_model_collection': from models.my_searched_model import my_specialized if args.structure_path is None: _ = args.model_name.split(':') net_config_path = os.path.join(project_path, 'models', 'my_model_collection', _[0], _[1] + '.json') else: net_config_path = args.structure_path model = my_specialized(num_classes=args.num_classes, net_config=net_config_path, dropout_rate=args.drop) else: model_fun = get_model.get_model(args.model_method, args.model_name) model = model_fun(num_classes=args.num_classes, dropout_rate=args.drop) # set bn model.set_bn_param(args.bn_momentum, args.bn_eps) # model init model.init_model(model_init=args.model_init) total_ops, total_params = flops_counter.profile(model, [1, 3, 224, 224]) logger.info("Model size = {:.3f} MB".format(total_params)) logger.info( "Model FLOPS with input [1,3,224,224] = {:.3f} M".format(total_ops)) # pdb.set_trace() # model = create_model( # args.model, # pretrained=args.pretrained, # num_classes=args.num_classes, # drop_rate=args.drop, # drop_connect_rate=args.drop_connect, # global_pool=args.gp, # bn_tf=args.bn_tf, # bn_momentum=args.bn_momentum, # bn_eps=args.bn_eps, # checkpoint_path=args.initial_checkpoint) if args.local_rank == 0: logger.info('Model %s created, param count: %d' % (args.model, sum([m.numel() for m in model.parameters()]))) data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0) num_aug_splits = 0 if args.aug_splits > 0: assert args.aug_splits > 1, 'A split of 1 makes no sense' num_aug_splits = args.aug_splits if args.split_bn: assert num_aug_splits > 1 or args.resplit model = convert_splitbn_model(model, max(num_aug_splits, 2)) if args.num_gpu > 1: if args.amp: logger.warning( 'AMP does not work well with nn.DataParallel, disabling. Use distributed mode for multi-GPU AMP.' ) args.amp = False model = nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda() else: model.cuda() optimizer = create_optimizer(args, model) use_amp = False if has_apex and args.amp: model, optimizer = amp.initialize(model, optimizer, opt_level='O1') use_amp = True if args.local_rank == 0: logger.info('NVIDIA APEX {}. AMP {}.'.format( 'installed' if has_apex else 'not installed', 'on' if use_amp else 'off')) # optionally resume from a checkpoint resume_state = {} resume_epoch = None if args.resume: resume_state, resume_epoch = resume_checkpoint(model, args.resume) if resume_state and not args.no_resume_opt: if 'optimizer' in resume_state: if args.local_rank == 0: logger.info('Restoring Optimizer state from checkpoint') optimizer.load_state_dict(resume_state['optimizer']) if use_amp and 'amp' in resume_state and 'load_state_dict' in amp.__dict__: if args.local_rank == 0: logger.info('Restoring NVIDIA AMP state from checkpoint') amp.load_state_dict(resume_state['amp']) del resume_state model_ema = None if args.model_ema: # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper model_ema = ModelEma(model, decay=args.model_ema_decay, device='cpu' if args.model_ema_force_cpu else '', resume=args.resume) if args.distributed: if args.sync_bn: assert not args.split_bn try: if has_apex: model = convert_syncbn_model(model) else: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm( model) if args.local_rank == 0: logger.info( 'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using ' 'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.' ) except Exception as e: logger.error( 'Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1' ) if has_apex: model = DDP(model, delay_allreduce=True) else: if args.local_rank == 0: logger.info( "Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP." ) model = DDP(model, device_ids=[args.local_rank ]) # can use device str in Torch >= 1.1 # NOTE: EMA model does not need to be wrapped by DDP lr_scheduler, num_epochs = create_scheduler(args, optimizer) start_epoch = 0 if args.start_epoch is not None: # a specified start_epoch will always override the resume epoch start_epoch = args.start_epoch elif resume_epoch is not None: start_epoch = resume_epoch if lr_scheduler is not None and start_epoch > 0: lr_scheduler.step(start_epoch) if args.local_rank == 0: logger.info('Scheduled epochs: {}'.format(num_epochs)) train_dir = os.path.join(args.data, 'train') if not os.path.exists(train_dir): logger.error('Training folder does not exist at: {}'.format(train_dir)) exit(1) dataset_train = Dataset(train_dir) collate_fn = None if args.prefetcher and args.mixup > 0: assert not num_aug_splits # collate conflict (need to support deinterleaving in collate mixup) collate_fn = FastCollateMixup(args.mixup, args.smoothing, args.num_classes) if num_aug_splits > 1: dataset_train = AugMixDataset(dataset_train, num_splits=num_aug_splits) loader_train = create_loader( dataset_train, input_size=data_config['input_size'], batch_size=args.batch_size, is_training=True, use_prefetcher=args.prefetcher, re_prob=args.reprob, re_mode=args.remode, re_count=args.recount, re_split=args.resplit, color_jitter=args.color_jitter, auto_augment=args.aa, num_aug_splits=num_aug_splits, interpolation=args.train_interpolation, mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, distributed=args.distributed, collate_fn=collate_fn, pin_memory=args.pin_mem, ) eval_dir = os.path.join(args.data, 'val') if not os.path.isdir(eval_dir): eval_dir = os.path.join(args.data, 'validation') if not os.path.isdir(eval_dir): logger.error( 'Validation folder does not exist at: {}'.format(eval_dir)) exit(1) dataset_eval = Dataset(eval_dir) loader_eval = create_loader( dataset_eval, input_size=data_config['input_size'], batch_size=args.validation_batch_size_multiplier * args.batch_size, is_training=False, use_prefetcher=args.prefetcher, interpolation=data_config['interpolation'], mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, distributed=args.distributed, crop_pct=data_config['crop_pct'], pin_memory=args.pin_mem, ) if args.jsd: assert num_aug_splits > 1 # JSD only valid with aug splits set train_loss_fn = JsdCrossEntropy(num_splits=num_aug_splits, smoothing=args.smoothing).cuda() validate_loss_fn = nn.CrossEntropyLoss().cuda() elif args.mixup > 0.: # smoothing is handled with mixup label transform train_loss_fn = SoftTargetCrossEntropy().cuda() validate_loss_fn = nn.CrossEntropyLoss().cuda() elif args.smoothing: train_loss_fn = LabelSmoothingCrossEntropy( smoothing=args.smoothing).cuda() validate_loss_fn = nn.CrossEntropyLoss().cuda() else: train_loss_fn = nn.CrossEntropyLoss().cuda() validate_loss_fn = train_loss_fn eval_metric = args.eval_metric best_metric = None best_epoch = None saver = None # output_dir = '' # if args.local_rank == 0: # output_base = args.output if args.output else './output' # exp_name = '-'.join([ # datetime.now().strftime("%Y%m%d-%H%M%S"), # args.model_method, # args.model_name, # str(data_config['input_size'][-1]) # ]) # output_dir = get_outdir(output_base, 'train', exp_name) # decreasing = True if eval_metric == 'loss' else False # saver = CheckpointSaver(checkpoint_dir=output_dir, decreasing=decreasing) # with open(os.path.join(output_dir, 'args.yaml'), 'w') as f: # f.write(args_text) try: for epoch in range(start_epoch, num_epochs): if args.distributed: loader_train.sampler.set_epoch(epoch) train_metrics = train_epoch(epoch, model, loader_train, optimizer, train_loss_fn, args, lr_scheduler=lr_scheduler, saver=saver, output_dir=output_dir, use_amp=use_amp, model_ema=model_ema) if args.distributed and args.dist_bn in ('broadcast', 'reduce'): if args.local_rank == 0: logger.info( "Distributing BatchNorm running means and vars") distribute_bn(model, args.world_size, args.dist_bn == 'reduce') eval_metrics = validate(model, loader_eval, validate_loss_fn, args) if model_ema is not None and not args.model_ema_force_cpu: if args.distributed and args.dist_bn in ('broadcast', 'reduce'): distribute_bn(model_ema, args.world_size, args.dist_bn == 'reduce') ema_eval_metrics = validate(model_ema.ema, loader_eval, validate_loss_fn, args, log_suffix=' (EMA)') eval_metrics = ema_eval_metrics if lr_scheduler is not None: # step LR for next epoch lr_scheduler.step(epoch + 1, eval_metrics[eval_metric]) update_summary(epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'), write_header=best_metric is None) if saver is not None: # save proper checkpoint with eval metric save_metric = eval_metrics[eval_metric] best_metric, best_epoch = saver.save_checkpoint( model, optimizer, args, epoch=epoch, model_ema=model_ema, metric=save_metric, use_amp=use_amp) except KeyboardInterrupt: pass if best_metric is not None: logger.info('*** Best metric: {0} (epoch {1})'.format( best_metric, best_epoch))