def _run_training(self): if self.params.lb_smooth == 0: self.criterion = torch.nn.CrossEntropyLoss().cuda() else: if self.local_rank == 0: logging.info( "Using CrossEntropyLoss with label smooth {}.".format( self.params.lb_smooth)) self.criterion = utils.CrossEntropyLabelSmooth( self.reader.n_classes, self.params.lb_smooth) # if start_new_model is True, global_step = 0 # else we get global step from checkpoint if self.start_new_model: start_epoch = 0 global_step = 0 else: start_epoch = self.checkpoint['epoch'] global_step = self.checkpoint['global_step'] data_loader, sampler = self.reader.load_dataset() if sampler is not None: assert sampler.num_replicas == self.world_size batch_size = self.batch_size if self.is_distributed: n_files = sampler.num_samples else: n_files = self.reader.n_train_files if self.local_rank == 0: logging.info("Number of files on worker: {}".format(n_files)) logging.info("Start training") profile_enabled = False for epoch_id in range(start_epoch, self.params.num_epochs): if self.is_distributed: sampler.set_epoch(epoch_id) for n_batch, data in enumerate(data_loader): epoch = (int(global_step) * batch_size) / n_files with torch.autograd.profiler.profile(enabled=profile_enabled, use_cuda=True) as prof: self._training(data, epoch, global_step) if profile_enabled: logging.info(prof.key_averages().table( sort_by="self_cpu_time_total")) # prof.export_chrome_trace(join( # self.train_dir+'_logs', 'trace_{}.json'.format(global_step))) self._save_ckpt(global_step, epoch_id) global_step += 1 self.scheduler.step() self._save_ckpt(global_step, epoch_id, final=True) logging.info("Done training -- epoch limit reached.")
def main(): if not torch.cuda.is_available(): logger.info("no gpu device available") sys.exit(1) logger.info("*** Begin {} ***".format(config.stage)) # set default gpu device torch.cuda.set_device(config.gpus[0]) # set random seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True # get data with meta info logger.info("preparing data...") input_size, channels_in, num_classes, train_data, valid_data = \ load_dataset(dataset=config.dataset, data_dir=config.data_dir, cutout_length=config.cutout_length, validation=True, auto_aug=config.auto_aug) valid_loader = torch.utils.data.DataLoader(dataset=valid_data, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers, pin_memory=True) logger.info("loading model...") if config.load_model_dir is not None: model = torch.load(config.load_model_dir) else: model = utils.load_checkpoint(config.model_dir) model = model.to(device) model_size = utils.param_size(model) logger.info("model_size: {:.3f} MB".format(model_size)) if config.label_smooth > 0: criterion = utils.CrossEntropyLabelSmooth(num_classes, config.label_smooth) else: criterion = nn.CrossEntropyLoss() criterion = criterion.to(device) logger.info("start testing...") best_top1 = test(valid_loader, model, criterion) logger.info("Final Prec@1: {:.4%}".format(best_top1)) logger.info("*** Finish {} ***".format(config.stage))
def main(): config = RetrainConfig() main_proc = not config.distributed or config.local_rank == 0 if config.distributed: torch.cuda.set_device(config.local_rank) torch.distributed.init_process_group(backend='nccl', init_method=config.dist_url, rank=config.local_rank, world_size=config.world_size) if main_proc: os.makedirs(config.output_path, exist_ok=True) if config.distributed: torch.distributed.barrier() logger = utils.get_logger(os.path.join(config.output_path, 'search.log')) if main_proc: config.print_params(logger.info) utils.reset_seed(config.seed) loaders, samplers = get_augment_datasets(config) train_loader, valid_loader = loaders train_sampler, valid_sampler = samplers model = Model(config.dataset, config.layers, in_channels=config.input_channels, channels=config.init_channels, retrain=True).cuda() if config.label_smooth > 0: criterion = utils.CrossEntropyLabelSmooth(config.n_classes, config.label_smooth) else: criterion = nn.CrossEntropyLoss() fixed_arc_path = os.path.join(config.output_path, config.arc_checkpoint) with open(fixed_arc_path, "r") as f: fixed_arc = json.load(f) fixed_arc = utils.encode_tensor(fixed_arc, torch.device("cuda")) genotypes = utils.parse_results(fixed_arc, n_nodes=4) genotypes_dict = {i: genotypes for i in range(3)} apply_fixed_architecture(model, fixed_arc_path) param_size = utils.param_size( model, criterion, [3, 32, 32] if 'cifar' in config.dataset else [3, 224, 224]) if main_proc: logger.info("Param size: %.6f", param_size) logger.info("Genotype: %s", genotypes) # change training hyper parameters according to cell type if 'cifar' in config.dataset: if param_size < 3.0: config.weight_decay = 3e-4 config.drop_path_prob = 0.2 elif 3.0 < param_size < 3.5: config.weight_decay = 3e-4 config.drop_path_prob = 0.3 else: config.weight_decay = 5e-4 config.drop_path_prob = 0.3 if config.distributed: apex.parallel.convert_syncbn_model(model) model = DistributedDataParallel(model, delay_allreduce=True) optimizer = torch.optim.SGD(model.parameters(), config.lr, momentum=config.momentum, weight_decay=config.weight_decay) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config.epochs, eta_min=1E-6) best_top1 = best_top5 = 0. for epoch in range(config.epochs): drop_prob = config.drop_path_prob * epoch / config.epochs if config.distributed: model.module.drop_path_prob(drop_prob) else: model.drop_path_prob(drop_prob) # training if config.distributed: train_sampler.set_epoch(epoch) train(logger, config, train_loader, model, optimizer, criterion, epoch, main_proc) # validation top1, top5 = validate(logger, config, valid_loader, model, criterion, epoch, main_proc) best_top1 = max(best_top1, top1) best_top5 = max(best_top5, top5) lr_scheduler.step() logger.info("Final best Prec@1 = %.4f Prec@5 = %.4f", best_top1, best_top5)
def build_imagenet(model_state_dict, optimizer_state_dict, **kwargs): valid_ratio = kwargs.pop('valid_ratio', None) valid_num = kwargs.pop('valid_num', None) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_transform = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ColorJitter( brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1), transforms.ToTensor(), normalize, ]) valid_transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]) if args.zip_file: logging.info('Loading data from zip file') traindir = os.path.join(args.data, 'train.zip') if args.lazy_load: data = utils.ZipDataset(traindir) else: logging.info('Loading data into memory') data = utils.InMemoryZipDataset(traindir, num_workers=args.num_workers) else: logging.info('Loading data from directory') traindir = os.path.join(args.data, 'train') if args.lazy_load: data = dset.ImageFolder(traindir) else: logging.info('Loading data into memory') data = utils.InMemoryDataset(traindir, num_workers=args.num_workers) num_data = len(data) indices = list(range(num_data)) np.random.shuffle(indices) if valid_ratio is not None: split = int(np.floor(1 - valid_ratio * num_data)) train_indices = sorted(indices[:split]) valid_indices = sorted(indices[split:]) else: assert valid_num is not None train_indices = sorted(indices[valid_num:]) valid_indices = sorted(indices[:valid_num]) train_data = utils.WrappedDataset(data, train_indices, train_transform) valid_data = utils.WrappedDataset(data, valid_indices, valid_transform) logging.info('train set = %d', len(train_data)) logging.info('valid set = %d', len(valid_data)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(train_indices), pin_memory=True, num_workers=args.num_workers, drop_last=False) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.eval_batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_indices), pin_memory=True, num_workers=args.num_workers, drop_last=False) model = NASNet(args.width_stages, args.n_cell_stages, args.stride_stages, args.dropout) model.init_model(args.model_init) model.set_bn_param(0.1, 0.001) logging.info("param size = %d", utils.count_parameters(model)) if model_state_dict is not None: model.load_state_dict(model_state_dict) if args.no_decay_keys: keys = args.no_decay_keys.split('#') net_params=[model.get_parameters(keys, mode='exclude'), model.get_parameters(keys, mode='include')] optimizer = torch.optim.SGD([ {'params': net_params[0], 'weight_decay': args.weight_decay}, {'params': net_params[1], 'weight_decay': 0},], args.lr, momentum=0.9, nesterov=True, ) else: optimizer = torch.optim.SGD( model.parameters(), args.lr, momentum=0.9, weight_decay=args.weight_decay, nesterov=True, ) if optimizer_state_dict is not None: optimizer.load_state_dict(optimizer_state_dict) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.decay_period, gamma=args.gamma) if torch.cuda.device_count() > 1: logging.info("Use %d %s", torch.cuda.device_count(), "GPUs !") model = nn.DataParallel(model) model = model.cuda() train_criterion = utils.CrossEntropyLabelSmooth(1000, args.label_smooth).cuda() eval_criterion = nn.CrossEntropyLoss().cuda() return train_queue, valid_queue, model, train_criterion, eval_criterion, optimizer, scheduler
def build_imagenet(model_config, model_state_dict, optimizer_state_dict, **kwargs): epoch = kwargs.pop('epoch') step = kwargs.pop('step') # build model logging.info('Building Model') model = NASNet.build_from_config(model_config) model.init_model(args.model_init) model.set_bn_param(model_config['bn']['momentum'], model_config['bn']['eps']) print(model.config) logging.info("param size = %d", utils.count_parameters(model)) logging.info("multi adds = %fM", model.get_flops(torch.ones(1, 3, 224, 224).float())[0] / 1000000) if model_state_dict is not None: model.load_state_dict(model_state_dict) if torch.cuda.device_count() > 1: logging.info("Use %d %s", torch.cuda.device_count(), "GPUs !") model = nn.DataParallel(model) model = model.cuda() # build criterion logging.info('Building Criterion') train_criterion = utils.CrossEntropyLabelSmooth(1000, args.label_smooth).cuda() eval_criterion = nn.CrossEntropyLoss().cuda() # build optimizer logging.info('Building Optimizer') if args.no_decay_keys: keys = args.no_decay_keys.split('#') net_params=[model.module.get_parameters(keys, mode='exclude'), model.module.get_parameters(keys, mode='include')] optimizer = torch.optim.SGD([ {'params': net_params[0], 'weight_decay': args.weight_decay}, {'params': net_params[1], 'weight_decay': 0},], args.lr, momentum=0.9, nesterov=True, ) else: optimizer = torch.optim.SGD( model.parameters(), args.lr, momentum=0.9, weight_decay=args.weight_decay, nesterov=True, ) if optimizer_state_dict is not None: optimizer.load_state_dict(optimizer_state_dict) # build data loader logging.info('Building Data') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_transform = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ColorJitter( brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1), transforms.ToTensor(), normalize, ]) valid_transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]) if args.zip_file: logging.info('Loading data from zip file') traindir = os.path.join(args.data, 'train.zip') validdir = os.path.join(args.data, 'valid.zip') if args.lazy_load: train_data = utils.ZipDataset(traindir, train_transform) valid_data = utils.ZipDataset(validdir, valid_transform) else: logging.info('Loading data into memory') train_data = utils.InMemoryZipDataset(traindir, train_transform, num_workers=args.num_workers) valid_data = utils.InMemoryZipDataset(validdir, valid_transform, num_workers=args.num_workers) else: logging.info('Loading data from directory') traindir = os.path.join(args.data, 'train') validdir = os.path.join(args.data, 'val') if args.lazy_load: train_data = dset.ImageFolder(traindir, train_transform) valid_data = dset.ImageFolder(validdir, valid_transform) else: logging.info('Loading data into memory') train_data = utils.InMemoryDataset(traindir, train_transform, num_workers=args.num_workers) valid_data = utils.InMemoryDataset(validdir, valid_transform, num_workers=args.num_workers) logging.info('Found %d in training data', len(train_data)) logging.info('Found %d in validation data', len(valid_data)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.num_workers) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.eval_batch_size, shuffle=False, pin_memory=True, num_workers=args.num_workers) # build lr scheduler logging.info('Building LR Scheduler') if args.lr_scheduler == 'cosine': scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs)*len(train_queue), 0, step) else: scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.decay_period, args.gamma, epoch) return train_queue, valid_queue, model, train_criterion, eval_criterion, optimizer, scheduler
def main(args=None): if args is None: args = get_parameter() if args.dataset == 'dali' and not dali_enable: args.case = args.case.replace('dali', 'imagenet') args.dataset = 'imagenet' args.workers = 12 # log_dir if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) model_arch = args.model model_name = model_arch if args.evaluate: log_suffix = 'eval-' + model_arch + '-' + args.case else: log_suffix = model_arch + '-' + args.case utils.setup_logging(os.path.join(args.log_dir, log_suffix + '.txt'), resume=args.resume) logging.info("current folder: %r", os.getcwd()) logging.info("alqnet plugins: %r", plugin_enable) logging.info("apex available: %r", apex_enable) logging.info("dali available: %r", dali_enable) for x in vars(args): logging.info("config %s: %r", x, getattr(args, x)) torch.manual_seed(args.seed) if torch.cuda.is_available() and len(args.device_ids) > 0: args.device_ids = [ x for x in args.device_ids if x < torch.cuda.device_count() and x >= 0 ] if len(args.device_ids) == 0: args.device_ids = None else: logging.info("training on %d gpu", len(args.device_ids)) else: args.device_ids = None if args.device_ids is not None: torch.cuda.manual_seed_all(args.seed) cudnn.benchmark = True torch.backends.cudnn.deterministic = True #https://github.com/pytorch/pytorch/issues/8019 else: logging.info( "no gpu available, try CPU version, lots of functions limited") #return if model_name in models.model_zoo: model, args = models.get_model(args) else: logging.error("model(%s) not support, available models: %r" % (model_name, models.model_zoo)) return criterion = nn.CrossEntropyLoss() if 'label-smooth' in args.keyword: criterion_smooth = utils.CrossEntropyLabelSmooth( args.num_classes, args.label_smooth) # load policy for initial phase models.policy.deploy_on_init(model, getattr(args, 'policy', '')) # load policy for epoch updating epoch_policies = models.policy.read_policy(getattr(args, 'policy', ''), section='epoch') # print model logging.info("models: %r" % model) logging.info("epoch_policies: %r" % epoch_policies) utils.check_folder(args.weights_dir) args.weights_dir = os.path.join(args.weights_dir, model_name) utils.check_folder(args.weights_dir) args.resume_file = os.path.join(args.weights_dir, args.case + "-" + args.resume_file) args.pretrained = os.path.join(args.weights_dir, args.pretrained) epoch = 0 lr = args.lr best_acc = 0 scheduler = None checkpoint = None # resume training if args.resume: if utils.check_file(args.resume_file): logging.info("resuming from %s" % args.resume_file) if torch.cuda.is_available(): checkpoint = torch.load(args.resume_file) else: checkpoint = torch.load(args.resume_file, map_location='cpu') if 'epoch' in checkpoint: epoch = checkpoint['epoch'] logging.info("resuming ==> last epoch: %d" % epoch) epoch = epoch + 1 logging.info("updating ==> epoch: %d" % epoch) if 'best_acc' in checkpoint: best_acc = checkpoint['best_acc'] logging.info("resuming ==> best_acc: %f" % best_acc) if 'learning_rate' in checkpoint: lr = checkpoint['learning_rate'] logging.info("resuming ==> learning_rate: %f" % lr) if 'state_dict' in checkpoint: utils.load_state_dict(model, checkpoint['state_dict']) logging.info("resumed from %s" % args.resume_file) else: logging.info("warning: *** resume file not exists({})".format( args.resume_file)) args.resume = False else: if utils.check_file(args.pretrained): logging.info("load pretrained from %s" % args.pretrained) if torch.cuda.is_available(): checkpoint = torch.load(args.pretrained) else: checkpoint = torch.load(args.pretrained, map_location='cpu') logging.info("load pretrained ==> last epoch: %d" % checkpoint.get('epoch', 0)) logging.info("load pretrained ==> last best_acc: %f" % checkpoint.get('best_acc', 0)) logging.info("load pretrained ==> last learning_rate: %f" % checkpoint.get('learning_rate', 0)) #if 'learning_rate' in checkpoint: # lr = checkpoint['learning_rate'] # logging.info("resuming ==> learning_rate: %f" % lr) try: utils.load_state_dict( model, checkpoint.get('state_dict', checkpoint.get('model', checkpoint))) except RuntimeError as err: logging.info("Loading pretrained model failed %r" % err) else: logging.info( "no pretrained file exists({}), init model with default initlizer" .format(args.pretrained)) if args.device_ids is not None: torch.cuda.set_device(args.device_ids[0]) if not isinstance(model, nn.DataParallel) and len(args.device_ids) > 1: model = nn.DataParallel(model, args.device_ids).cuda() else: model = model.cuda() criterion = criterion.cuda() if 'label-smooth' in args.keyword: criterion_smooth = criterion_smooth.cuda() if 'label-smooth' in args.keyword: train_criterion = criterion_smooth else: train_criterion = criterion # move after to_cuda() for speedup if args.re_init and not args.resume: for m in model.modules(): if hasattr(m, 'init_after_load_pretrain'): m.init_after_load_pretrain() # dataset data_path = args.root dataset = args.dataset logging.info("loading dataset with batch_size {} and val-batch-size {}. " "dataset: {}, resolution: {}, path: {}".format( args.batch_size, args.val_batch_size, dataset, args.input_size, data_path)) if args.val_batch_size < 1: val_loader = None else: if args.evaluate: val_batch_size = (args.batch_size // 100) * 100 if val_batch_size > 0: args.val_batch_size = val_batch_size logging.info("update val_batch_size to %d in evaluate mode" % args.val_batch_size) val_loader = datasets.data_loader(args.dataset)('val', args) if args.evaluate and val_loader is not None: if args.fp16 and torch.backends.cudnn.enabled and apex_enable and args.device_ids is not None: logging.info("training with apex fp16 at opt_level {}".format( args.opt_level)) else: args.fp16 = False logging.info("training without apex") if args.fp16: optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level) logging.info("evaluate the dataset on pretrained model...") result = validate(val_loader, model, criterion, args) top1, top5, loss = result logging.info('evaluate accuracy on dataset: top1(%f) top5(%f)' % (top1, top5)) return train_loader = datasets.data_loader(args.dataset)('train', args) if isinstance(train_loader, torch.utils.data.dataloader.DataLoader): train_length = len(train_loader) else: train_length = getattr(train_loader, '_size', 0) / getattr( train_loader, 'batch_size', 1) # sample several iteration / epoch to calculate the initial value of quantization parameters if args.stable_epoch > 0 and args.stable <= 0: args.stable = train_length * args.stable_epoch logging.info("update stable: %d" % args.stable) # fix learning rate at the beginning to warmup if args.warmup_epoch > 0 and args.warmup <= 0: args.warmup = train_length * args.warmup_epoch logging.info("update warmup: %d" % args.warmup) params_dict = dict(model.named_parameters()) params = [] quant_wrapper = [] for key, value in params_dict.items(): #print(key) if 'quant_weight' in key and 'quant_weight' in args.custom_lr_list: to_be_quant = key.split('.quant_weight')[0] + '.weight' if to_be_quant not in quant_wrapper: quant_wrapper += [to_be_quant] if len(quant_wrapper) > 0 and args.verbose: logging.info("quant_wrapper: {}".format(quant_wrapper)) for key, value in params_dict.items(): shape = value.shape custom_hyper = dict() custom_hyper['params'] = value if value.requires_grad == False: continue found = False for i in args.custom_decay_list: if i in key and len(i) > 0: found = True break if found: custom_hyper['weight_decay'] = args.custom_decay elif (not args.decay_small and args.no_decay_small) and ( (len(shape) == 4 and shape[1] == 1) or (len(shape) == 1)): custom_hyper['weight_decay'] = 0.0 found = False for i in args.custom_lr_list: if i in key and len(i) > 0: found = True break if found: #custom_hyper.setdefault('lr_constant', args.custom_lr) # 2019.11.25 custom_hyper['lr'] = args.custom_lr elif key in quant_wrapper: custom_hyper.setdefault('lr_constant', args.custom_lr) custom_hyper['lr'] = args.custom_lr params += [custom_hyper] if 'debug' in args.keyword: logging.info("{}, decay {}, lr {}, constant {}".format( key, custom_hyper.get('weight_decay', "default"), custom_hyper.get('lr', "default"), custom_hyper.get('lr_constant', "No"))) optimizer = None if args.optimizer == "ADAM": optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay) if args.optimizer == "SGD": optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) if args.resume and checkpoint is not None: try: optimizer.load_state_dict(checkpoint['optimizer']) except RuntimeError as error: logging.info("Restore optimizer state failed %r" % error) if args.fp16 and torch.backends.cudnn.enabled and apex_enable and args.device_ids is not None: logging.info("training with apex fp16 at opt_level {}".format( args.opt_level)) else: args.fp16 = False logging.info("training without apex") if args.sync_bn: logging.info("sync_bn to be supported, currently not yet") if args.fp16: model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level) if args.resume and checkpoint is not None: try: amp.load_state_dict(checkpoint['amp']) except RuntimeError as error: logging.info("Restore amp state failed %r" % error) # start tensorboard as late as possible if args.tensorboard and not args.evaluate: tb_log = os.path.join(args.log_dir, log_suffix) args.tensorboard = SummaryWriter(tb_log, filename_suffix='.' + log_suffix) else: args.tensorboard = None logging.info("start to train network " + model_name + ' with case ' + args.case) while epoch < (args.epochs + args.extra_epoch): if 'proxquant' in args.keyword: if args.proxquant_step < 10: if args.lr_policy in ['sgdr', 'sgdr_step', 'custom_step']: index = len([x for x in args.lr_custom_step if x <= epoch]) for m in model.modules(): if hasattr(m, 'prox'): m.prox = 1.0 - 1.0 / args.proxquant_step * (index + 1) else: for m in model.modules(): if hasattr(m, 'prox'): m.prox = 1.0 - 1.0 / args.proxquant_step * epoch if m.prox < 0: m.prox = 0 if epoch < args.epochs: lr, scheduler = utils.setting_learning_rate( optimizer, epoch, train_length, checkpoint, args, scheduler) if lr is None: logging.info('lr is invalid at epoch %d' % epoch) return else: logging.info('[epoch %d]: lr %e', epoch, lr) loss = 0 top1, top5, eloss = 0, 0, 0 is_best = top1 > best_acc # leverage policies on epoch models.policy.deploy_on_epoch(model, epoch_policies, epoch, optimizer=optimizer, verbose=logging.info) if 'lr-test' not in args.keyword: # otherwise only print the learning rate in each epoch # training loss = train(train_loader, model, train_criterion, optimizer, args, scheduler, epoch, lr) #for i in range(train_length): # scheduler.step() logging.info('[epoch %d]: train_loss %.3f' % (epoch, loss)) # validate top1, top5, eloss = 0, 0, 0 top1, top5, eloss = validate(val_loader, model, criterion, args) is_best = top1 > best_acc if is_best: best_acc = top1 logging.info('[epoch %d]: test_acc %f %f, best top1: %f, loss: %f', epoch, top1, top5, best_acc, eloss) if args.tensorboard is not None: args.tensorboard.add_scalar(log_suffix + '/train-loss', loss, epoch) args.tensorboard.add_scalar(log_suffix + '/eval-top1', top1, epoch) args.tensorboard.add_scalar(log_suffix + '/eval-top5', top5, epoch) args.tensorboard.add_scalar(log_suffix + '/lr', lr, epoch) utils.save_checkpoint( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': None if scheduler is None else scheduler.state_dict(), 'best_acc': best_acc, 'learning_rate': lr, 'amp': None if not args.fp16 else amp.state_dict(), }, is_best, args) epoch = epoch + 1 if epoch == 1: logging.info(utils.gpu_info())
def main(): config = RetrainConfig() main_proc = not config.distributed or config.local_rank == 0 if config.distributed: torch.cuda.set_device(config.local_rank) torch.distributed.init_process_group(backend='nccl', init_method=config.dist_url, rank=config.local_rank, world_size=config.world_size) if main_proc: os.makedirs(config.output_path, exist_ok=True) if config.distributed: torch.distributed.barrier() logger = utils.get_logger(os.path.join(config.output_path, 'search.log')) if main_proc: config.print_params(logger.info) utils.reset_seed(config.seed) loaders, samplers = get_augment_datasets(config) train_loader, valid_loader = loaders train_sampler, valid_sampler = samplers train_loader = CyclicIterator(train_loader, train_sampler) # valid_loader = CyclicIterator(valid_loader, valid_sampler, False) model = Model(config.dataset, config.layers, in_channels=config.input_channels, channels=config.init_channels, retrain=True).cuda() if config.label_smooth > 0: criterion = utils.CrossEntropyLabelSmooth(config.n_classes, config.label_smooth) else: criterion = nn.CrossEntropyLoss() fixed_arc_path = os.path.join('', config.arc_checkpoint) with open(fixed_arc_path, "r") as f: fixed_arc = json.load(f) fixed_arc = utils.encode_tensor(fixed_arc, torch.device("cuda")) genotypes = utils.parse_results(fixed_arc, n_nodes=4) genotypes_dict = {i: genotypes for i in range(3)} apply_fixed_architecture(model, fixed_arc_path) param_size = utils.param_size(model, criterion, [3, 512, 512]) if main_proc: logger.info("Param size: %.6f", param_size) logger.info("Genotype: %s", genotypes) # change training hyper parameters according to cell type if 'cifar' in config.dataset: if param_size < 3.0: config.weight_decay = 3e-4 config.drop_path_prob = 0.2 elif 3.0 < param_size < 3.5: config.weight_decay = 3e-4 config.drop_path_prob = 0.3 else: config.weight_decay = 5e-4 config.drop_path_prob = 0.3 if config.distributed: apex.parallel.convert_syncbn_model(model) model = DistributedDataParallel(model, delay_allreduce=True) optimizer = torch.optim.AdamW(model.parameters(), config.lr) # lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config.epochs, eta_min=1E-6) best_top1 = 0. epoch = 0 try: checkpoint = torch.load(config.model_checkpoint) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) epoch = checkpoint['epoch'] loss = checkpoint['loss'] model.eval() print("----------------------------") print("MODEL LOADED FROM CHECKPOINT" + config.model_checkpoint) print("----------------------------") except: print("----------------------------") print("MODEL NOT LOADED FROM CHECKPOINT") print("----------------------------") pass # for epoch in range(0, epoch): # lr_scheduler.step() for epoch in range(epoch, config.epochs): drop_prob = config.drop_path_prob * epoch / config.epochs if config.distributed: model.module.drop_path_prob(drop_prob) else: model.drop_path_prob(drop_prob) # training if config.distributed: train_sampler.set_epoch(epoch) train(logger, config, train_loader, model, optimizer, criterion, epoch, main_proc) if (epoch % config.log_frequency == 0): # validation top1 = validate(logger, config, valid_loader, model, criterion, epoch, main_proc) best_top1 = max(best_top1, top1) # lr_scheduler.step() logger.info("Final best Prec@1 = %.4f", best_top1)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device %s' % args.gpu) logging.info('small regime or regular regime %s' % args.regime) logging.info("args = %s", args) # read graph info if args.regime: graph_info_dict3 = read_graph_info(args.graph_txt + "3.txt") graph_info_dict4 = read_graph_info(args.graph_txt + "4.txt") graph_info_dict5 = read_graph_info(args.graph_txt + "5.txt") graph_info_dicts = [graph_info_dict3, graph_info_dict4, graph_info_dict5] else: graph_info_dict2 = read_graph_info(args.graph_txt + "2.txt") graph_info_dict3 = read_graph_info(args.graph_txt + "3.txt") graph_info_dict4 = read_graph_info(args.graph_txt + "4.txt") graph_info_dict5 = read_graph_info(args.graph_txt + "5.txt") graph_info_dicts = [graph_info_dict2, graph_info_dict3, graph_info_dict4, graph_info_dict5] writer = SummaryWriter(log_dir=args.save) # CrossEntropyLabelSmooth for train, CrossEntropyLoss for val criterion_smooth = utils.CrossEntropyLabelSmooth(NUM_CLASSES, args.label_smooth) criterion_smooth = criterion_smooth.cuda() criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = RandomlyWiredNN(args.base_channels, NUM_CLASSES, args.output_channels, args.regime, graph_info_dicts) x = torch.randn(2, 3, 224, 224) writer.add_graph(model, (x,)) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) logging.info("FLOPs = %fMB", utils.model_param_flops_in_MB(model, input_res=[224, 224], multiply_adds=False)) if args.parallel: model = nn.DataParallel(model).cuda() print("multi GPUs") else: model = model.cuda() print("single GPU") optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_queue, valid_queue = preprocess_imagenet(args) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs), eta_min=args.learning_rate_min) best_acc_top1 = 0.0 best_acc_top5 = 0.0 for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) train_acc_top1, train_acc_top5, train_obj, train_speed = train(train_queue, model, criterion_smooth, optimizer) logging.info('train_acc %f', train_acc_top1) logging.info('train_speed_per_image %f', train_speed) writer.add_scalar('train_loss', train_obj, epoch) valid_acc_top1, valid_acc_top5, valid_obj, valid_speed = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc_top1) logging.info('valid_speed_per_image %f', valid_speed) writer.add_scalar('val_loss', valid_obj, epoch) if valid_acc_top1 > best_acc_top1: best_acc_top1 = valid_acc_top1 utils.save_parm(model, os.path.join(args.save, 'model_top1.pt')) if valid_acc_top5 > best_acc_top5: best_acc_top5 = valid_acc_top5 utils.save_parm(model, os.path.join(args.save, 'model_top5.pt')) writer.close()
def main(): logger.info("Logger is set - training start") # set default gpu device id torch.cuda.set_device(config.gpus[0]) # set seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True # get data with meta info input_size, input_channels, n_classes, train_data, valid_data = utils.get_data( config.dataset, config.data_path, config.cutout_length, validation=True, autoaugment=config.autoaugment) if config.label_smooth != 0: criterion = utils.CrossEntropyLabelSmooth( 10, config.label_smooth).to(device) else: criterion = nn.CrossEntropyLoss().to(device) use_aux = config.aux_weight > 0. if config.dataset in utils.LARGE_DATASETS: model = AugmentCNNImageNet(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype) else: model = AugmentCNN(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype, SSC=config.SSC) model = nn.DataParallel(model, device_ids=config.gpus).to(device) # model size mb_params = utils.param_size(model) logger.info("Model size = {:.3f} MB".format(mb_params)) # weights optimizer if config.p != 1: optimizer = torch.optim.SGD(model.parameters(), 1., momentum=config.momentum, weight_decay=config.weight_decay) else: optimizer = torch.optim.SGD(model.parameters(), config.lr, momentum=config.momentum, weight_decay=config.weight_decay) train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True) if config.p == 1: lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, config.epochs) else: lr_cpa = utils.cosine_power_annealing_lr(nepochs=config.epochs, min_lr=config.lr_min, max_lr=config.lr, p=config.p) lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [lr_cpa]) best_top1 = 0. # training loop for epoch in range(config.epochs): lr_scheduler.step() drop_prob = config.drop_path_prob * epoch / config.epochs model.module.drop_path_prob(drop_prob) # training train(train_loader, model, optimizer, criterion, epoch) # validation cur_step = (epoch + 1) * len(train_loader) top1 = validate(valid_loader, model, criterion, epoch, cur_step) # save if best_top1 < top1: best_top1 = top1 is_best = True else: is_best = False utils.save_checkpoint(model, config.path, is_best) print("") logger.info("Final best Prec@1 = {:.4%} for job {}".format( best_top1, config.name))
def main(): if not torch.cuda.is_available(): logger.info("no gpu device available") sys.exit(1) logger.info("*** Begin {} ***".format(config.stage)) # set default gpu device torch.cuda.set_device(config.gpus[0]) # set random seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True # get data with meta info logger.info("preparing data...") input_size, channels_in, num_classes, train_data, valid_data = \ load_dataset(dataset=config.dataset, data_dir=config.data_dir, cutout_length=config.cutout_length, validation=True, auto_aug=config.auto_aug) train_loader = torch.utils.data.DataLoader( dataset=train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader( dataset=valid_data, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers, pin_memory=True) logger.info("parsing genotypes...") genotypes = parse_genotypes() logger.info(genotypes) logger.info("building model...") model = AugmentCNN(input_size=input_size, channels_in=channels_in, channels_init=config.init_channels, num_cells=config.num_cells, num_nodes=config.num_nodes, num_classes=num_classes, stem_multiplier=3, auxiliary=(config.aux_weight > 0), genotypes=genotypes, alpha_share=config.alpha_share) model = model.to(device) model_size = utils.param_size(model) logger.info("model_size: {:.3f} MB".format(model_size)) if config.label_smooth > 0: criterion = utils.CrossEntropyLabelSmooth( num_classes, config.label_smooth) else: criterion = nn.CrossEntropyLoss() criterion = criterion.to(device) optimizer = torch.optim.SGD(params=model.parameters(), lr=config.learning_rate, momentum=config.momentum, weight_decay=config.weight_decay) if config.power_lr: lr_scheduler = utils.CosinePowerAnnealingLR( optimizer=optimizer, T_max=config.epochs, p=2) else: lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer=optimizer, T_max=config.epochs) logger.info("start training...") history_top1 = [] best_top1 = 0.0 for epoch in range(config.epochs): lr_scheduler.step() lr = lr_scheduler.get_lr()[0] logger.info("epoch: {:d}, lr: {:e}".format(epoch, lr)) drop_prob = config.drop_path_prob * epoch / config.epochs model.drop_path_prob(drop_prob) train(train_loader, model, criterion, optimizer, epoch) global_step = (epoch + 1) * len(train_loader) - 1 valid_top1 = valid(valid_loader, model, criterion, epoch, global_step) history_top1.append(valid_top1) if epoch == 0 or best_top1 < valid_top1: best_top1 = valid_top1 is_best = True else: is_best = False utils.save_checkpoint(model, config.model_dir, is_best=is_best) with open(os.path.join(config.stage_dir, "history_top1.pk"), "wb") as f: pickle.dump(history_top1, f) logger.info("Final best valid Prec@1: {:.4%}".format(best_top1)) logger.info("*** Finish {} ***".format(config.stage))