def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) random.seed(args.seed) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) torch.backends.cudnn.benchmark = True torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) model = CNN(args) model.cuda() controller = Controller(args) controller.cuda() baseline = None optimizer = torch.optim.SGD( model.parameters(), args.child_lr_max, momentum=args.momentum, weight_decay=args.weight_decay, ) controller_optimizer = torch.optim.Adam( controller.parameters(), args.controller_lr, betas=(0.1, 0.999), eps=1e-3, ) train_loader, reward_loader, valid_loader = get_loaders(args) scheduler = utils.LRScheduler(optimizer, args) # zychen param_calculator = ParamCalculation(args.param_target) for epoch in range(args.epochs): lr = scheduler.update(epoch) logging.info('epoch %d lr %e', epoch, lr) # training train_acc = train(train_loader, model, controller, optimizer) logging.info('train_acc %f', train_acc) train_controller(reward_loader, model, controller, controller_optimizer, param_calculator) # validation valid_acc = infer(valid_loader, model, controller, param_calculator) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): print('no gpu device available') sys.exit(1) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True print("args = %s", args) model = Child(args) model.cuda() controller = Controller() controller.cuda() optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=1e-4) controller_optimizer = torch.optim.Adam(controller.parameters(), lr=0.0035, betas=(0.1, 0.999), eps=1e-3) scheduler = utils.LRScheduler(optimizer, args) for epoch in range(args.epochs): lr = scheduler.update(epoch) print('epoch', epoch, lr) # training train_acc = train(model, controller, optimizer) print('train_acc', train_acc) train_controller(model, controller, controller_optimizer) # validation valid_acc = infer(model, controller) print('valid_acc', valid_acc) torch.save(model.state_dict(), 'weights.pth')
def __init__(self, config): self.rank, self.world_size = 0, 1 if config['dist']: self.rank = dist.get_rank() self.world_size = dist.get_world_size() self.config = config self.mode = config['dgp_mode'] self.custom_mask = config['custom_mask'] self.update_G = config['update_G'] self.update_embed = config['update_embed'] self.iterations = config['iterations'] self.ftr_num = config['ftr_num'] self.ft_num = config['ft_num'] self.lr_ratio = config['lr_ratio'] self.G_lrs = config['G_lrs'] self.z_lrs = config['z_lrs'] self.use_in = config['use_in'] self.select_num = config['select_num'] self.factor = 4 # Downsample factor self.mask_path = config['mask_path'] #Create selective masking if self.custom_mask: self.mask = torch.ones(1, 1, 256, 256).cuda() x = Image.open(self.mask_path) pil_to_tensor = transforms.ToTensor()(x).unsqueeze_(0) t = Variable(torch.Tensor([0.9])) # threshold final_mask = F.interpolate(pil_to_tensor, size=(256, 256), mode='bilinear') self.mask = (final_mask > t).float() * 1 self.mask = self.mask[0][0].cuda() self.regions = self.get_regions(self.mask) ######################### # create model self.G = models.Generator(**config).cuda() self.D = models.Discriminator( **config).cuda() if config['ftr_type'] == 'Discriminator' else None self.G.optim = torch.optim.Adam( [{ 'params': self.G.get_params(i, self.update_embed) } for i in range(len(self.G.blocks) + 1)], lr=config['G_lr'], betas=(config['G_B1'], config['G_B2']), weight_decay=0, eps=1e-8) # load weights if config['random_G']: self.random_G() else: utils.load_weights(self.G if not (config['use_ema']) else None, self.D, config['weights_root'], name_suffix=config['load_weights'], G_ema=self.G if config['use_ema'] else None, strict=False) self.G.eval() if self.D is not None: self.D.eval() self.G_weight = deepcopy(self.G.state_dict()) # prepare latent variable and optimizer self._prepare_latent() # prepare learning rate scheduler self.G_scheduler = utils.LRScheduler(self.G.optim, config['warm_up']) self.z_scheduler = utils.LRScheduler(self.z_optim, config['warm_up']) # loss functions self.mse = torch.nn.MSELoss() if config['ftr_type'] == 'Discriminator': self.ftr_net = self.D self.criterion = utils.DiscriminatorLoss( ftr_num=config['ftr_num'][0]) else: vgg = torchvision.models.vgg16(pretrained=True).cuda().eval() self.ftr_net = models.subsequence(vgg.features, last_layer='20') self.criterion = utils.PerceptLoss() # Downsampler for producing low-resolution image self.downsampler = Downsampler(n_planes=3, factor=self.factor, kernel_type='lanczos2', phase=0.5, preserve_size=True).type( torch.cuda.FloatTensor)
def main(): global args, best_prec1, best_prec5 global rank, world_size args = parser.parse_args() if args.distribute: import multiprocessing as mp if mp.get_start_method(allow_none=True) != 'spawn': mp.set_start_method('spawn', force=True) rank, world_size = dist_init(args.port) else: rank, world_size = 0, 1 assert (args.batch_size % world_size == 0) assert (args.workers % world_size == 0) args.batch_size = args.batch_size // world_size args.workers = args.workers // world_size if rank == 0: if not os.path.isdir(os.path.dirname(args.save_path)): os.makedirs(os.path.dirname(args.save_path)) # sw config sw_cfg = dict(type='SW', sw_type=args.sw_type, num_pergroup=args.num_pergroup, T=args.T, tie_weight=args.tie_weight, momentum=0.9, affine=True) # create model print("=> creating model '{}'".format(args.arch)) if args.arch.startswith('inception'): print('inception_v3 without aux_logits!') image_size = 341 input_size = 299 model = models.__dict__[args.arch](pretrained=args.pretrain) else: image_size = 256 input_size = 224 model = models.__dict__[args.arch]( pretrained=args.pretrain, sw_cfg=sw_cfg if args.use_sw else None) if rank == 0: print(model) print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) model.cuda() if args.distribute: model = DistModule(model) else: model = torch.nn.DataParallel(model) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.load_path: if args.resume_opt: best_prec1, best_prec5, args.start_epoch = utils.load_state( args.load_path, model, optimizer=optimizer) else: utils.load_state(args.load_path, model) torch.cuda.empty_cache() cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) val_dataset = datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(image_size), transforms.CenterCrop(input_size), transforms.ToTensor(), normalize, ])) train_sampler = DistributedSampler( train_dataset) if args.distribute else None val_sampler = DistributedSampler(val_dataset) if args.distribute else None train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False if args.distribute else True, num_workers=args.workers, pin_memory=False, sampler=train_sampler) val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False, sampler=val_sampler) if args.evaluate: validate(val_loader, model, criterion) return niters = len(train_loader) lr_scheduler = utils.LRScheduler(optimizer, niters, args) if rank == 0: tb_logger = SummaryWriter(args.save_path + '/events') else: tb_logger = None for epoch in range(args.start_epoch, args.epochs): # adjust_learning_rate(optimizer, epoch) if train_sampler is not None: train_sampler.set_epoch(epoch) # train for one epoch prec1_train, loss_train = train(train_loader, model, criterion, optimizer, lr_scheduler, epoch) # evaluate on validation set prec1, prec5, loss_val = validate(val_loader, model, criterion) if rank == 0: # tb tb_logger.add_scalar('loss_train', loss_train, epoch) tb_logger.add_scalar('acc1_train', prec1_train, epoch) tb_logger.add_scalar('loss_test', loss_val, epoch) tb_logger.add_scalar('acc1_test', prec1, epoch) # remember best prec@1 and save checkpoint is_best1 = prec1 > best_prec1 is_best5 = prec5 > best_prec5 best_prec1 = max(prec1, best_prec1) best_prec5 = max(prec5, best_prec5) utils.save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'prec1': prec1, 'prec5': prec5, 'optimizer': optimizer.state_dict(), }, is_best1, is_best5, args.save_path)
def __init__(self, config): self.rank, self.world_size = 0, 1 if config['dist']: self.rank = dist.get_rank() self.world_size = dist.get_world_size() self.config = config self.mode = config['dgp_mode'] self.update_G = config['update_G'] self.update_embed = config['update_embed'] self.iterations = config['iterations'] self.ftr_num = config['ftr_num'] self.ft_num = config['ft_num'] self.lr_ratio = config['lr_ratio'] self.G_lrs = config['G_lrs'] self.z_lrs = config['z_lrs'] self.use_in = config['use_in'] self.select_num = config['select_num'] self.factor = 2 if self.mode == 'hybrid' else 4 # Downsample factor # create model self.G = models.Generator(**config).cuda() self.D = models.Discriminator( **config).cuda() if config['ftr_type'] == 'Discriminator' else None self.G.optim = torch.optim.Adam( [{ 'params': self.G.get_params(i, self.update_embed) } for i in range(len(self.G.blocks) + 1)], lr=config['G_lr'], betas=(config['G_B1'], config['G_B2']), weight_decay=0, eps=1e-8) # load weights if config['random_G']: self.random_G() else: utils.load_weights(self.G if not (config['use_ema']) else None, self.D, config['weights_root'], name_suffix=config['load_weights'], G_ema=self.G if config['use_ema'] else None, strict=False) self.G.eval() if self.D is not None: self.D.eval() self.G_weight = deepcopy(self.G.state_dict()) # prepare latent variable and optimizer self._prepare_latent() # prepare learning rate scheduler self.G_scheduler = utils.LRScheduler(self.G.optim, config['warm_up']) self.z_scheduler = utils.LRScheduler(self.z_optim, config['warm_up']) # loss functions self.mse = torch.nn.MSELoss() if config['ftr_type'] == 'Discriminator': self.ftr_net = self.D self.criterion = utils.DiscriminatorLoss( ftr_num=config['ftr_num'][0]) else: vgg = torchvision.models.vgg16(pretrained=True).cuda().eval() self.ftr_net = models.subsequence(vgg.features, last_layer='20') self.criterion = utils.PerceptLoss() # Downsampler for producing low-resolution image self.downsampler = Downsampler(n_planes=3, factor=self.factor, kernel_type='lanczos2', phase=0.5, preserve_size=True).type( torch.cuda.FloatTensor)