def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) #损失函数 criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) #优化器 optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() number_of_classes = class_dict[args.dataset] in_channels = inp_channel_dict[args.dataset] model = Network(args.init_channels, number_of_classes, args.layers, criterion, in_channels) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # Get transforms to apply on data train_transform, valid_transform = utils.get_data_transforms(args) # Get the training queue train_queue, valid_queue = get_training_queues(args, train_transform) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): # if not torch.cuda.is_available(): # logging.info('no gpu device available') # sys.exit(1) np.random.seed(args.seed) torch.manual_seed(args.seed) # logging.info('gpu device = %d' % args.gpu) # logging.info("args = %s", args) criterion = nn.MSELoss() # criterion = criterion.cuda() model = Network(args.network_inputsize, args.network_outputsize, args.max_width, args.max_depth, criterion) # model = model.cuda() optimizer = torch.optim.Adam( model.parameters(), args.learning_rate, #momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) plt.ion() for epoch in range(args.epochs): lr = scheduler.get_lr()[0] # lr = args.learning_rate logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.w_alpha, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch) logging.info('train_acc %f', train_acc) scheduler.step() # validation valid_acc, valid_obj = infer(valid_queue, model, criterion, epoch) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) plt.draw() plt.pause(0.1) plt.ioff() plt.show()
def init_model(self): self.model = Network(self.args.init_channels, CIFAR_CLASSES, self.args.layers, self.criterion, self.args, self.rank, self.world_size, self.args.steps, self.args.multiplier) self.model.to(self.device) for v in self.model.parameters(): if v.requires_grad: if v.grad is None: v.grad = torch.zeros_like(v) self.model.normal_log_alpha.grad = torch.zeros_like( self.model.normal_log_alpha) self.model.reduce_log_alpha.grad = torch.zeros_like( self.model.reduce_log_alpha)
def main(args): if not args.use_gpu: place = fluid.CPUPlace() elif not args.use_data_parallel: place = fluid.CUDAPlace(0) else: place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) train_reader, valid_reader = reader.train_search( batch_size=args.batch_size, train_portion=0.5, is_shuffle=True, args=args) with fluid.dygraph.guard(place): model = Network(args.init_channels, args.class_num, args.layers, args.method) searcher = DARTSearch(model, train_reader, valid_reader, place, learning_rate=args.learning_rate, batchsize=args.batch_size, num_imgs=args.trainset_num, arch_learning_rate=args.arch_learning_rate, unrolled=args.unrolled, num_epochs=args.epochs, epochs_no_archopt=args.epochs_no_archopt, use_multiprocess=args.use_multiprocess, use_data_parallel=args.use_data_parallel, save_dir=args.model_save_dir, log_freq=args.log_freq) searcher.train()
def init_model(self): self.model = Network(self.args.init_channels, CIFAR_CLASSES, self.args.layers, self.criterion, self.args, self.rank, self.world_size) self.model.to(self.device) if self.args.distributed: broadcast_params(self.model) for v in self.model.parameters(): if v.requires_grad: if v.grad is None: v.grad = torch.zeros_like(v) self.model.normal_log_alpha.grad = torch.zeros_like( self.model.normal_log_alpha) self.model.reduce_log_alpha.grad = torch.zeros_like( self.model.reduce_log_alpha)
def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) logging.info("args = %s", args) use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} train_loader, valid_loader = utils.search_dataloader(args, kwargs) criterion = nn.CrossEntropyLoss().to(device) model = Network(device, nodes=2).to(device) logging.info( "param size = %fMB", np.sum(np.prod(v.size()) for name, v in model.named_parameters()) / 1e6) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=0.9, weight_decay=args.weight_decay) architect = Architect(model) for epoch in range(args.epochs): logging.info("Starting epoch %d/%d", epoch + 1, args.epochs) # training train_acc, train_obj = train(train_loader, valid_loader, model, architect, criterion, optimizer, device) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_loader, model, criterion, device) logging.info('valid_acc %f', valid_acc) # compute the discrete architecture from the current alphas genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) with open(args.save + '/architecture', 'w') as f: f.write(str(genotype))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) dataset = Dataset(args.dataset) train_examples = torch.from_numpy(dataset.get_train().astype('int64')) valid_examples = torch.from_numpy(dataset.get_valid().astype('int64')) CLASSES = dataset.get_shape()[0] criterion = nn.CrossEntropyLoss(reduction='mean') #criterion = CrossEntropyLabelSmooth(CLASSES, args.label_smooth) criterion = criterion.cuda() regularizer = { 'N2': N2(args.reg), 'N3': N3(args.reg), }[args.regularizer] model = Network(args.channels, CLASSES, args.layers, criterion, regularizer, args.interleaved, dataset.get_shape(), args.emb_dim, args.init, args.steps) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) #model = utils.load(model, 'search-EXP-20190823-173036%f/weights.pt') weights = torch.load('search-EXP-20190823-173036%f/weights.pt') #print(weights) embeddings = [weights['embeddings.0.weight'], weights['embeddings.1.weight']] torch.save(embeddings, 'search-EXP-20190823-173036%f/embeddings.pt')
def _build_model(): self.model = nn.DataParallel( Network(C=args.init_channels, num_classes=10, layer=args.layers)).cuda() self.loss_func = torch.nn.CrossEntropyLoss().cuda() self.opt = torch.optim.SGD(self.model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.wd) self.opt_sche = torch.optim.lr_scheduler.CosineAnnealingLR( self.opt, float(args.epochs), eta_min=args.m_lr)
def __init__(self, trial_context: PyTorchTrialContext) -> None: self.context = trial_context self.data_config = trial_context.get_data_config() self.hparams = AttrDict(trial_context.get_hparams()) self.last_epoch = 0 self.data_dir = os.path.join( self.data_config["download_dir"], f"data-rank{self.context.distributed.get_rank()}", ) # Initialize the models. criterion = nn.CrossEntropyLoss() self.model = self.context.wrap_model( Network( self.hparams.init_channels, self.hparams.n_classes, self.hparams.layers, criterion, self.hparams.nodes, k=self.hparams.shuffle_factor, )) # Initialize the optimizers and learning rate scheduler. self.ws_opt = self.context.wrap_optimizer( torch.optim.SGD( self.model.ws_parameters(), self.hparams.learning_rate, momentum=self.hparams.momentum, weight_decay=self.hparams.weight_decay, )) self.arch_opt = self.context.wrap_optimizer( EG( self.model.arch_parameters(), self.hparams.arch_learning_rate, lambda p: p / p.sum(dim=-1, keepdim=True), )) self.lr_scheduler = self.context.wrap_lr_scheduler( lr_scheduler=CosineAnnealingLR( self.ws_opt, self.hparams.scheduler_epochs, self.hparams.min_learning_rate, ), step_mode=LRScheduler.StepMode.STEP_EVERY_EPOCH, )
def load_state_dict(model: Network, state_dict, strict=True): """Copies parameters and buffers from :attr:`state_dict` into this module and its descendants. If :attr:`strict` is ``True`` then the keys of :attr:`state_dict` must exactly match the keys returned by this module's :func:`state_dict()` function. Arguments: state_dict (dict): A dict containing parameters and persistent buffers. strict (bool): Strictly enforce that the keys in :attr:`state_dict` match the keys returned by this module's `:func:`state_dict()` function. :param strict: :param state_dict: :param model: """ own_state = model.state_dict() for name, param in state_dict.items(): if name in own_state: if isinstance(param, torch.nn.Parameter): # backwards compatibility for serialized parameters param = param.detach() try: own_state[name].copy_(param) except Exception: raise RuntimeError( 'While copying the parameter named {}, ' 'whose dimensions in the model are {} and ' 'whose dimensions in the checkpoint are {}.'. format(name, own_state[name].size(), param.size())) elif strict: raise KeyError( 'unexpected key "{}" in state_dict'.format(name)) if strict: missing = set(own_state.keys()) - set(state_dict.keys()) if len(missing) > 0: raise KeyError( 'missing keys in state_dict: "{}"'.format(missing))
def _build_model(): self.model = nn.DataParallel( Network(C=args.init_channels, num_classes=10, layer=args.layers)).cuda() self.loss_func = torch.nn.CrossEntropyLoss().cuda() if args.mode == 0: self.opt = torch.optim.SGD(self.model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.wd) self.opt_sche = torch.optim.lr_scheduler.CosineAnnealingLR( self.opt, float(args.epochs), eta_min=args.m_lr) elif args.mode in [1, 2, 3]: alpha_sign = 'w_alpha' self.w_opt = torch.optim.SGD([ p[1] for p in self.model.named_parameters() if p[0].find(alpha_sign) < 0 ], lr=args.lr, momentum=args.momentum, weight_decay=args.wd) self.alpha_opt = torch.optim.Adam([ p[1] for p in self.model.named_parameters() if p[0].find(alpha_sign) >= 0 ], lr=args.arch_lr, betas=(0.5, 0.999), weight_decay=args.arch_lr) self.opt_sche = torch.optim.lr_scheduler.CosineAnnealingLR( self.w_opt, float(args.epochs), eta_min=args.m_lr) else: print('invalid search mode.') sys.exit(0)
def _construct_model_from_theta(self, theta): model_clone = Network(self.model._C, self.model._num_classes, self.model._layers, self.model._criterion).cuda() for x, y in zip(model_clone.arch_parameters(), self.model.arch_parameters()): x.data.copy_(y.data) model_dict = self.model.state_dict() params, offset = {}, 0 for k, v in self.model.named_parameters(): v_length = np.prod(v.size()) params[k] = theta[offset:offset + v_length].view(v.size()) offset += v_length assert offset == len(theta) model_dict.update(params) model_clone.load_state_dict(model_dict) return model_clone.cuda()
def main(args): global log log = logging.getLogger("train_search") CIFAR_CLASSES = 10 if args.set == 'cifar100': CIFAR_CLASSES = 100 if not torch.cuda.is_available(): log.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) log.info('gpu device = %d' % args.gpu) log.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() log.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, _ = utils._data_transforms_cifar10(args) if args.set == 'cifar100': train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) targets = train_data.targets train_idx = np.arange(len(targets)) if args.subsample > 0: train_idx, _ = train_test_split(train_idx, test_size=1 - args.subsample, shuffle=True, stratify=targets) num_train = len(train_idx) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_idx[indices[:split]]), pin_memory=True, num_workers=4) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_idx[indices[split:num_train]]), pin_memory=True, num_workers=4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.epochs, eta_min=args.learning_rate_min) architect = Architect(model, args) train_acc = None valid_acc = None l1_loss = torch.zeros(1) l2_loss = torch.zeros(1) criterion_loss = torch.zeros(1) genotype = model.genotype() log.info('initial genotype = %s', genotype) for epoch in range(args.epochs): lr = scheduler.get_last_lr()[0] log.info('epoch %d lr %e', epoch, lr) # model.drop_path_prob = args.drop_path_prob * epoch / args.epochs # training train_acc, train_obj, l1_loss, l2_loss, criterion_loss = train( train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch, args.grad_clip, args.report_lines, args.unrolled, args.criterion_weight, args.l1_weight, args.l2_weight) scheduler.step() log.info('train_acc %f', train_acc) log.info('%s %f', L1_LOSS, l1_loss) log.info('%s %f', L2_LOSS, l2_loss) log.info('criterion_loss %f', criterion_loss) # validation if args.epochs - epoch <= 1: valid_acc, valid_obj = infer(valid_queue, model, criterion, args.report_lines) log.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) genotype = model.genotype() log.info('genotype = %s', genotype) log.info('last genotype = %s', genotype) model = TrainNetwork(36, CIFAR_CLASSES, 20, False, genotype) model_size_mb = utils.count_parameters_in_MB(model) log.info("Train model param size = %.2fMB", model_size_mb) return { L1_LOSS: { tuple([args.l1_weight, args.criterion_weight]): { TRAIN_ACC: train_acc, VALID_ACC: valid_acc, REG_LOSS: l1_loss.cpu().data.item(), CRITERION_LOSS: criterion_loss.cpu().data.item(), SIZE: model_size_mb, GENOTYPE: genotype } }, L2_LOSS: { tuple([args.l2_weight, args.criterion_weight]): { TRAIN_ACC: train_acc, VALID_ACC: valid_acc, REG_LOSS: l2_loss.cpu().data.item(), CRITERION_LOSS: criterion_loss.cpu().data.item(), SIZE: model_size_mb, GENOTYPE: genotype } } }
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) if args.loss_func == 'cce': criterion = nn.CrossEntropyLoss().cuda() elif args.loss_func == 'rll': criterion = utils.RobustLogLoss().cuda() else: assert False, "Invalid loss function '{}' given. Must be in {'cce', 'rll'}".format( args.loss_func) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) # Load dataset if args.gold_fraction == 0: train_data = CIFAR10(root=args.data, train=True, gold=False, gold_fraction=args.gold_fraction, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) if args.clean_valid: gold_train_data = CIFAR10(root=args.data, train=True, gold=True, gold_fraction=1.0, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) else: train_data = CIFAR10(root=args.data, train=True, gold=True, gold_fraction=args.gold_fraction, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) if args.clean_valid: valid_queue = torch.utils.data.DataLoader( gold_train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:]), pin_memory=True, num_workers=2) else: valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() KD_loss = kd_loss.KDLoss(args.temp) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, KD_loss) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) if not args.cls: print('not cls') trainloader, infer_pair_loader, infer_random_loader, valloader = dataset.load_dataset( args.dataset, args.dataroot, batch_size=args.batch_size) else: trainloader, infer_pair_loader, infer_random_loader, valloader = dataset.load_dataset( args.dataset, args.dataroot, 'pair', batch_size=args.batch_size) print(len(trainloader)) print(len(infer_pair_loader)) print(len(valloader)) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(trainloader, infer_pair_loader, model, architect, criterion, KD_loss, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(infer_random_loader, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): path_to_best_loss_eval = "./generator/best_loss_model_{}.csv".format(args.seed) path_to_best_model = "./generator/best_model_{}.pth".format(args.seed) if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) # ================= DONAS ========================== low_flops = args.low_flops high_flops = args.high_flops nodes, edges = model.get_arch_param_nums() lookup_table = LookUpTable(edges, nodes) arch_param_nums = nodes * edges generator = get_generator(20) generator = generator.cuda() backbone_pool = BackbonePool(nodes, edges, lookup_table, arch_param_nums) backbone = backbone_pool.get_backbone((low_flops+high_flops)/2) g_optimizer = torch.optim.Adam(generator.parameters(), weight_decay=0, lr=0.001, betas=(0.5, 0.999)) tau = 5 best_hc_loss = 100000 # ================= DONAS ========================== architect = Architect(model, generator, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, low_flops, high_flops, backbone, tau) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion, generator, backbone, (low_flops+high_flops)//2, lookup_table) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) evalulate_metric, total_loss, kendall_tau = evalulate_generator(generator, backbone, lookup_table, low_flops, high_flops) if total_loss < best_hc_loss: logger.log("Best hc loss : {}. Save model!".format(total_loss)) save_generator_evaluate_metric(evalulate_metric, path_to_best_loss_eval) best_hc_loss = total_loss if valid_acc > best_top1: logger.log("Best top1-avg : {}. Save model!".format(valid_acc_top1)) save_model(generator, path_to_best_model) best_top1 = valid_acc tau *= 0.95
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, k=args.k) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.dataset == 'cifar100': train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True) architect = Architect(model, args) # configure progressive parameter epoch = 0 ks = [6, 4] num_keeps = [7, 4] train_epochs = [2, 2] if 'debug' in args.save else [25, 25] scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(sum(train_epochs)), eta_min=args.learning_rate_min) for i, current_epochs in enumerate(train_epochs): for e in range(current_epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) model.show_arch_parameters() # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, e) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) epoch += 1 scheduler.step() utils.save(model, os.path.join(args.save, 'weights.pt')) if not i == len(train_epochs) - 1: model.pruning(num_keeps[i + 1]) # architect.pruning([model.mask_normal, model.mask_reduce]) model.wider(ks[i + 1]) optimizer = configure_optimizer( optimizer, torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)) scheduler = configure_scheduler( scheduler, torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(sum(train_epochs)), eta_min=args.learning_rate_min)) logging.info('pruning finish, %d ops left per edge', num_keeps[i + 1]) logging.info('network wider finish, current pc parameter %d', ks[i + 1]) genotype = model.genotype() logging.info('genotype = %s', genotype) model.show_arch_parameters()
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10_simple( args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) # adversarial testing adv_acc, adv_obj = infer_minibatch(valid_queue, model, criterion) logging.info('adv_acc %f', adv_acc) #infer_minibatch(valid_queue, model, criterion) utils.save(model, os.path.join(args.save, 'weights_' + str(epoch) + '.pt'))
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('GPU device = %d' % args.gpu) logging.info("args = %s", args) # prepare dataset train_transform, valid_transform = utils.data_transforms(args.dataset,args.cutout,args.cutout_length) if args.dataset == "CIFAR100": train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) elif args.dataset == "CIFAR10": train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) elif args.dataset == 'mit67': dset_cls = dset.ImageFolder data_path = '%s/MIT67/train' % args.tmp_data_dir # 'data/MIT67/train' val_path = '%s/MIT67/test' % args.tmp_data_dir # 'data/MIT67/val' train_data = dset_cls(root=data_path, transform=train_transform) valid_data = dset_cls(root=val_path, transform=valid_transform) elif args.dataset == 'sport8': dset_cls = dset.ImageFolder data_path = '%s/Sport8/train' % args.tmp_data_dir # 'data/Sport8/train' val_path = '%s/Sport8/test' % args.tmp_data_dir # 'data/Sport8/val' train_data = dset_cls(root=data_path, transform=train_transform) valid_data = dset_cls(root=val_path, transform=valid_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) random.shuffle(indices) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=args.workers) # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() switches = [] for i in range(14): switches.append([True for j in range(len(PRIMITIVES))]) switches_normal = copy.deepcopy(switches) switches_reduce = copy.deepcopy(switches) # To be moved to args num_to_keep = [5, 3, 1] num_to_drop = [3, 2, 2] if len(args.add_width) == 3: add_width = args.add_width else: add_width = [0, 0, 0] if len(args.add_layers) == 3: add_layers = args.add_layers else: add_layers = [0, 3, 6] if len(args.dropout_rate) ==3: drop_rate = args.dropout_rate else: drop_rate = [0.0, 0.0, 0.0] eps_no_archs = [10, 10, 10] for sp in range(len(num_to_keep)): model = Network(args.init_channels + int(add_width[sp]), CLASSES, args.layers + int(add_layers[sp]), criterion, switches_normal=switches_normal, switches_reduce=switches_reduce, p=float(drop_rate[sp]), largemode=args.dataset in utils.LARGE_DATASETS) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) network_params = [] for k, v in model.named_parameters(): if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce')): network_params.append(v) optimizer = torch.optim.SGD( network_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer_a = torch.optim.Adam(model.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) sm_dim = -1 epochs = args.epochs eps_no_arch = eps_no_archs[sp] scale_factor = 0.2 for epoch in range(epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('Epoch: %d lr: %e', epoch, lr) epoch_start = time.time() # training if epoch < eps_no_arch: model.p = float(drop_rate[sp]) * (epochs - epoch - 1) / epochs model.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=False) else: model.p = float(drop_rate[sp]) * np.exp(-(epoch - eps_no_arch) * scale_factor) model.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=True) logging.info('Train_acc %f', train_acc) epoch_duration = time.time() - epoch_start logging.info('Epoch time: %ds', epoch_duration) # validation if epochs - epoch < 5: valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('Valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) print('------Dropping %d paths------' % num_to_drop[sp]) # Save switches info for s-c refinement. if sp == len(num_to_keep) - 1: switches_normal_2 = copy.deepcopy(switches_normal) switches_reduce_2 = copy.deepcopy(switches_reduce) # drop operations with low architecture weights arch_param = model.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_normal[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: drop = get_min_k_no_zero(normal_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(normal_prob[i, :], num_to_drop[sp]) for idx in drop: switches_normal[i][idxs[idx]] = False reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy() for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_reduce[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: drop = get_min_k_no_zero(reduce_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(reduce_prob[i, :], num_to_drop[sp]) for idx in drop: switches_reduce[i][idxs[idx]] = False logging.info('switches_normal = %s', switches_normal) logging_switches(switches_normal) logging.info('switches_reduce = %s', switches_reduce) logging_switches(switches_reduce) if sp == len(num_to_keep) - 1: arch_param = model.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() reduce_prob = F.softmax(arch_param[1], dim=sm_dim).data.cpu().numpy() normal_final = [0 for idx in range(14)] reduce_final = [0 for idx in range(14)] # remove all Zero operations for i in range(14): if switches_normal_2[i][0] == True: normal_prob[i][0] = 0 normal_final[i] = max(normal_prob[i]) if switches_reduce_2[i][0] == True: reduce_prob[i][0] = 0 reduce_final[i] = max(reduce_prob[i]) # Generate Architecture keep_normal = [0, 1] keep_reduce = [0, 1] n = 3 start = 2 for i in range(3): end = start + n tbsn = normal_final[start:end] tbsr = reduce_final[start:end] edge_n = sorted(range(n), key=lambda x: tbsn[x]) keep_normal.append(edge_n[-1] + start) keep_normal.append(edge_n[-2] + start) edge_r = sorted(range(n), key=lambda x: tbsr[x]) keep_reduce.append(edge_r[-1] + start) keep_reduce.append(edge_r[-2] + start) start = end n = n + 1 for i in range(14): if not i in keep_normal: for j in range(len(PRIMITIVES)): switches_normal[i][j] = False if not i in keep_reduce: for j in range(len(PRIMITIVES)): switches_reduce[i][j] = False # translate switches into genotype genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype) ## restrict skipconnect (normal cell only) logging.info('Restricting skipconnect...') for sks in range(0, len(PRIMITIVES)+1): max_sk = len(PRIMITIVES) - sks num_sk = check_sk_number(switches_normal) if num_sk < max_sk: continue while num_sk > max_sk: normal_prob = delete_min_sk_prob(switches_normal, switches_normal_2, normal_prob) switches_normal = keep_1_on(switches_normal_2, normal_prob) switches_normal = keep_2_branches(switches_normal, normal_prob) num_sk = check_sk_number(switches_normal) logging.info('Number of skip-connect: %d', max_sk) genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype) with open(args.save + "/best_genotype.txt", "w") as f: f.write(str(genotype))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) # torch.cuda.set_device(args.gpu) gpus = [int(i) for i in args.gpu.split(',')] if len(gpus) == 1: torch.cuda.set_device(int(args.gpu)) # cudnn.benchmark = True torch.manual_seed(args.seed) # cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %s' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() if len(gpus)>1: print("True") model = nn.parallel.DataParallel(model, device_ids=gpus, output_device=gpus[0]) model = model.module arch_params = list(map(id, model.arch_parameters())) weight_params = filter(lambda p: id(p) not in arch_params, model.parameters()) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( # model.parameters(), weight_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) #optimizer = nn.DataParallel(optimizer, device_ids=gpus) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, criterion, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation with torch.no_grad(): valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): start_time = time.time() if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) torch.cuda.set_device(args.gpu) reproducibility(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.model_name, CIFAR_CLASSES, sub_policies, args.use_cuda, args.use_parallel, temperature=args.temperature, criterion=criterion) # model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # train_transform, valid_transform = utils._data_transforms_cifar10(args) # train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) # train_data = AugmCIFAR10( # root=args.data, train=True, download=True, # transform=train_transform, ops_names=sub_policies, search=True, magnitudes=model.magnitudes) # valid_data = AugmCIFAR10( # root=args.data, train=True, download=True, # transform=train_transform, ops_names=sub_policies, search=False, magnitudes=model.magnitudes) # num_train = len(train_data) # indices = list(range(num_train)) # split = int(np.floor(args.train_portion * num_train)) # train_queue = torch.utils.data.DataLoader( # train_data, batch_size=args.batch_size, # sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), # pin_memory=True, num_workers=args.num_workers) # valid_queue = torch.utils.data.DataLoader( # valid_data, batch_size=args.batch_size, # sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), # pin_memory=True, num_workers=args.num_workers) train_queue, valid_queue = get_dataloaders(args.dataset, args.batch_size, args.num_workers, args.dataroot, sub_policies, model.magnitudes, args.cutout, args.cutout_length, split=args.train_portion, split_idx=0, target_lb=-1) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() # logging.info('genotype = %s', genotype) print_genotype(genotype) # logging.info('%s' % str(torch.nn.functional.softmax(model.ops_weights, dim=-1))) probs = model.ops_weights # logging.info('%s' % str(probs / probs.sum(-1, keepdim=True))) logging.info('%s' % str(torch.nn.functional.softmax(probs, dim=-1))) logging.info('%s' % str(model.probabilities.clamp(0, 1))) logging.info('%s' % str(model.magnitudes.clamp(0, 1))) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) end_time = time.time() elapsed = end_time - start_time logging.info('elapsed time: %.3f Hours' % (elapsed / 3600.))
def nas(args: Namespace, task: Task, preprocess_func: Compose) -> Module: ''' Network Architecture Search method Given task and preprocess function, this method returns a model output by NAS. The implementation of DARTS is available at https://github.com/alphadl/darts.pytorch1.1 ''' # TODO: Replace model with the output by NAS args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S")) utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) CLASSES = task.n_classes if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) # torch.cuda.set_device(args.gpu) #gpus = [int(args.gpu)] gpus = [int(i) for i in args.gpu.split(',')] if len(gpus) == 1: torch.cuda.set_device(int(args.gpu)) # cudnn.benchmark = True torch.manual_seed(args.seed) # cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %s' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CLASSES, args.layers, criterion) model = model.cuda() if len(gpus) > 1: print("True") model = nn.parallel.DataParallel(model, device_ids=gpus, output_device=gpus[0]) model = model.module arch_params = list(map(id, model.arch_parameters())) weight_params = filter(lambda p: id(p) not in arch_params, model.parameters()) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( # model.parameters(), weight_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer = nn.DataParallel(optimizer, device_ids=gpus) if task.name == 'cifar100': train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=preprocess_func) #train_transform, valid_transform = utils._data_transforms_cifar10(args) #train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) elif task.name == 'cifar10': train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=preprocess_func) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer.module, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, criterion, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(args, train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation with torch.no_grad(): valid_acc, valid_obj = infer(args, valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) # return a neural network model (torch.nn.Module) genotype = model.genotype() logging.info('genotype = %s', genotype) model = NetworkClassification(36, task.n_classes, 20, False, genotype) return model
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) # prepare dataset if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=args.workers) # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() switches = [] for i in range(14): switches.append([True for j in range(len(PRIMITIVES))]) switches_normal = copy.deepcopy(switches) switches_reduce = copy.deepcopy(switches) # eps_no_archs = [10, 10, 10] eps_no_archs = [2, 2, 2] for sp in range(len(num_to_keep)): # if sp < 1: # continue model = Network(args.init_channels + int(add_width[sp]), CIFAR_CLASSES, args.layers + int(add_layers[sp]), criterion, switches_normal=switches_normal, switches_reduce=switches_reduce, p=float(drop_rate[sp])) model = nn.DataParallel(model) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) network_params = [] for k, v in model.named_parameters(): if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce')): network_params.append(v) optimizer = torch.optim.SGD(network_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # optimizer_a = torch.optim.Adam(model.module.arch_parameters(), # lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) optimizer_a = torch.optim.Adam(model.module.arch_parameters(), lr=args.arch_learning_rate, betas=(0, 0.999), weight_decay=args.arch_weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) sm_dim = -1 epochs = args.epochs eps_no_arch = eps_no_archs[sp] scale_factor = 0.2 # cur_sub_model = get_cur_model(model,switches_normal,switches_reduce,num_to_keep,num_to_drop,sp) for epoch in range(epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('Epoch: %d lr: %e', epoch, lr) epoch_start = time.time() # training if epoch < eps_no_arch: # if 0: model.module.p = float( drop_rate[sp]) * (epochs - epoch - 1) / epochs model.module.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=False) else: model.module.p = float(drop_rate[sp]) * np.exp( -(epoch - eps_no_arch) * scale_factor) model.module.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=True) logging.info('Train_acc %f', train_acc) epoch_duration = time.time() - epoch_start logging.info('Epoch time: %ds', epoch_duration) # validation if epochs - epoch < 5: valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('Valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) print('------Dropping %d paths------' % num_to_drop[sp]) # Save switches info for s-c refinement. if sp == len(num_to_keep) - 1: switches_normal_2 = copy.deepcopy(switches_normal) switches_reduce_2 = copy.deepcopy(switches_reduce) # drop operations with low architecture weights arch_param = model.module.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_normal[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: # for the last stage, drop all Zero operations drop = get_min_k_no_zero(normal_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(normal_prob[i, :], num_to_drop[sp]) for idx in drop: switches_normal[i][idxs[idx]] = False reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy() for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_reduce[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: drop = get_min_k_no_zero(reduce_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(reduce_prob[i, :], num_to_drop[sp]) for idx in drop: switches_reduce[i][idxs[idx]] = False logging.info('switches_normal = %s', switches_normal) logging_switches(switches_normal) logging.info('switches_reduce = %s', switches_reduce) logging_switches(switches_reduce) if sp == len(num_to_keep) - 1: arch_param = model.module.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() reduce_prob = F.softmax(arch_param[1], dim=sm_dim).data.cpu().numpy() normal_final = [0 for idx in range(14)] reduce_final = [0 for idx in range(14)] # remove all Zero operations for i in range(14): if switches_normal_2[i][0] == True: normal_prob[i][0] = 0 normal_final[i] = max(normal_prob[i]) if switches_reduce_2[i][0] == True: reduce_prob[i][0] = 0 reduce_final[i] = max(reduce_prob[i]) # Generate Architecture, similar to DARTS keep_normal = [0, 1] keep_reduce = [0, 1] n = 3 start = 2 for i in range(3): # 选出最大的两个前序节点 end = start + n tbsn = normal_final[start:end] tbsr = reduce_final[start:end] edge_n = sorted(range(n), key=lambda x: tbsn[x]) keep_normal.append(edge_n[-1] + start) keep_normal.append(edge_n[-2] + start) edge_r = sorted(range(n), key=lambda x: tbsr[x]) keep_reduce.append(edge_r[-1] + start) keep_reduce.append(edge_r[-2] + start) start = end n = n + 1 # set switches according the ranking of arch parameters for i in range(14): if not i in keep_normal: for j in range(len(PRIMITIVES)): switches_normal[i][j] = False if not i in keep_reduce: for j in range(len(PRIMITIVES)): switches_reduce[i][j] = False # translate switches into genotype genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype) ## restrict skipconnect (normal cell only) logging.info('Restricting skipconnect...') # generating genotypes with different numbers of skip-connect operations for sks in range(0, 9): max_sk = 8 - sks num_sk = check_sk_number(switches_normal) if not num_sk > max_sk: continue while num_sk > max_sk: normal_prob = delete_min_sk_prob(switches_normal, switches_normal_2, normal_prob) switches_normal = keep_1_on(switches_normal_2, normal_prob) switches_normal = keep_2_branches(switches_normal, normal_prob) num_sk = check_sk_number(switches_normal) logging.info('Number of skip-connect: %d', max_sk) genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) if args.random_seed: args.seed = np.random.randint(0, 1000, 1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) # dataset modelnet pre_transform, transform = T.NormalizeScale(), T.SamplePoints( args.num_points) train_dataset = GeoData.ModelNet(os.path.join(args.data, 'modelnet10'), '10', True, transform, pre_transform) train_queue = DenseDataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.batch_size // 2) test_dataset = GeoData.ModelNet(os.path.join(args.data, 'modelnet10'), '10', False, transform, pre_transform) valid_queue = DenseDataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.batch_size // 2) n_classes = train_queue.dataset.num_classes criterion = torch.nn.CrossEntropyLoss().cuda() model = Network(args.init_channels, n_classes, args.num_cells, criterion, args.n_steps, in_channels=args.in_channels, emb_dims=args.emb_dims, dropout=args.dropout, k=args.k).cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) num_edges = model._steps * 2 post_train = 5 # import pdb;pdb.set_trace() args.epochs = args.warmup_dec_epoch + args.decision_freq * ( num_edges - 1) + post_train + 1 logging.info("total epochs: %d", args.epochs) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) normal_selected_idxs = torch.tensor(len(model.alphas_normal) * [-1], requires_grad=False, dtype=torch.int).cuda() normal_candidate_flags = torch.tensor(len(model.alphas_normal) * [True], requires_grad=False, dtype=torch.bool).cuda() logging.info('normal_selected_idxs: {}'.format(normal_selected_idxs)) logging.info('normal_candidate_flags: {}'.format(normal_candidate_flags)) model.normal_selected_idxs = normal_selected_idxs model.normal_candidate_flags = normal_candidate_flags print(F.softmax(torch.stack(model.alphas_normal, dim=0), dim=-1).detach()) count = 0 normal_probs_history = [] train_losses, valid_losses = utils.AverageMeter(), utils.AverageMeter() for epoch in range(args.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # training # import pdb;pdb.set_trace() att = model.show_att() beta = model.show_beta() train_acc, train_losses = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, train_losses) valid_overall_acc, valid_class_acc, valid_losses = infer( valid_queue, model, criterion, valid_losses) logging.info( 'train_acc %f\tvalid_overall_acc %f \t valid_class_acc %f', train_acc, valid_overall_acc, valid_class_acc) logging.info('beta %s', beta.cpu().detach().numpy()) logging.info('att %s', att.cpu().detach().numpy()) # make edge decisions saved_memory_normal, model.normal_selected_idxs, \ model.normal_candidate_flags = edge_decision('normal', model.alphas_normal, model.normal_selected_idxs, model.normal_candidate_flags, normal_probs_history, epoch, model, args) if saved_memory_normal: del train_queue, valid_queue torch.cuda.empty_cache() count += 1 new_batch_size = args.batch_size + args.batch_increase * count logging.info("new_batch_size = {}".format(new_batch_size)) train_queue = DenseDataLoader(train_dataset, batch_size=new_batch_size, shuffle=True, num_workers=args.batch_size // 2) valid_queue = DenseDataLoader(test_dataset, batch_size=new_batch_size, shuffle=False, num_workers=args.batch_size // 2) # post validation if args.post_val: post_valid_overall_acc, post_valid_class_acc, valid_losses = infer( valid_queue, model, criterion, valid_losses) logging.info('post_valid_overall_acc %f', post_valid_overall_acc) writer.add_scalar('stats/train_acc', train_acc, epoch) writer.add_scalar('stats/valid_overall_acc', valid_overall_acc, epoch) writer.add_scalar('stats/valid_class_acc', valid_class_acc, epoch) utils.save(model, os.path.join(args.save, 'weights.pt')) scheduler.step() logging.info("#" * 30 + " Done " + "#" * 30) logging.info('genotype = %s', model.get_genotype())
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, args.eta_min, args.reg_flops, args.mu) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer_alpha = torch.optim.SGD( model.arch_parameters(), args.learning_rate_alpha, momentum=args.momentum, weight_decay=args.weight_decay_alpha) optimizer_omega = torch.optim.SGD( model.parameters(), args.learning_rate_omega, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) epoch = 0 flops_lambda = 0 flops_lambda_delta = args.lambda0 finished = False t = 0 while not finished: epoch_start = time.time() lr = args.learning_rate_omega model.drop_path_prob = 0 logging.info('epoch %d lr %e flops_weight %e', epoch, lr, flops_lambda) train_acc, train_obj = train(train_queue, model, criterion, optimizer_alpha, optimizer_omega, flops_lambda) logging.info('train_acc %f', train_acc) epoch_duration = time.time() - epoch_start logging.info('epoch time: %ds.', epoch_duration) pruning_epoch = prune_op(model, args) current_flops = model.current_flops() + args.base_flops logging.info('current model flops %e', current_flops) if pruning_epoch >= args.pruning_n0: flops_lambda_delta = args.lambda0 flops_lambda = flops_lambda / args.c0 else: flops_lambda_delta = flops_lambda_delta * args.c0 flops_lambda = flops_lambda + flops_lambda_delta if current_flops < args.min_flops: finished = True if pruning_epoch == 0: t = t + 1 else: if t > args.stable_round: genotype = model.genotype() logging.info('genotype = %s', genotype) t = 0 epoch += 1
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) dataset = Dataset(args.dataset) train_examples = torch.from_numpy(dataset.get_train().astype('int64')) valid_examples = torch.from_numpy(dataset.get_valid().astype('int64')) CLASSES = dataset.get_shape()[0] criterion = nn.CrossEntropyLoss(reduction='mean') #criterion = CrossEntropyLabelSmooth(CLASSES, args.label_smooth) criterion = criterion.cuda() regularizer = { 'N2': N2(args.reg), 'N3': N3(args.reg), }[args.regularizer] model = Network(args.channels, CLASSES, args.layers, criterion, regularizer, args.interleaved, dataset.get_shape(), args.emb_dim, args.init, args.steps) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate), #momentum=args.momentum, #weight_decay=args.weight_decay) 'Adam': lambda: optim.Adam(model.parameters(), lr=args.learning_rate, betas=(args.decay1, args.decay2)), 'SGD': lambda: optim.SGD(model.parameters(), lr=args.learning_rate) }[args.optimizer]() # optimizer = torch.optim.SGD( # model.parameters(), # args.learning_rate, # #TODO can we reintroduce these? # momentum=args.momentum, # weight_decay=args.weight_decay) train_queue = torch.utils.data.DataLoader( train_examples, batch_size=args.batch_size, shuffle=True, #sampler=torch.utils.data.sampler.RandomSampler(), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_examples, batch_size=args.batch_size, shuffle=True, #sampler=torch.utils.data.sampler.RandomSampler(), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) best_acc = 0 patience = 0 curve = {'valid': [], 'test': []} architect = Architect(model, args) for epoch in range(args.epochs): model.epoch = epoch print('model temperature param', 1.05**model.epoch) scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax((1.05**epoch) * model.alphas_normal, dim=-1)) train_epoch(train_examples, train_queue, valid_queue, model, architect, criterion, optimizer, regularizer, args.batch_size, args.learning_rate) if (epoch + 1) % args.report_freq == 0: valid, test = [ avg_both(*dataset.eval(model, split, -1 if split != 'train' else 50000)) for split in ['valid', 'test'] ] curve['valid'].append(valid) curve['test'].append(test) #curve['train'].append(train) #print("\t TRAIN: ", train) print("\t VALID: ", valid) print("\t TEST: ", test) is_best = False if valid['MRR'] > best_acc: best_acc = valid['MRR'] is_best = True patience = 0 else: patience += 1
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) #用来优化w的优化器 optimizer = torch.optim.SGD( model.parameters(), #优化器更新的参数,这里更新的是w args.learning_rate, #初始值是0.025,使用的余弦退火调度更新学习率,每个epoch的学习率都不一样 momentum=args.momentum, #0.9 weight_decay=args.weight_decay) #正则化参数3e-4 train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[:split] ), #自定义从样本中取数据的策略,当train_portion=0.5时,就是前一半的数据用于train pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), #数据集中后一半的数据用于验证 pin_memory=True, num_workers=2) # 学习率更新参数,每次迭代调整不同的学习率 scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( #使用余弦退火调度设置各组参数组的学习率 optimizer, float(args.epochs), eta_min=args.learning_rate_min) # 创建用于更新α的architect architect = Architect(model, args) # 经历50个epoch后搜索完毕 for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] #得到本次迭代的学习率lr logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() #对应论文2.4 选出来权重值大的两个前驱节点,并把(操作,前驱节点)存下来 logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): args.exp_path /= f'{args.gpu}_{time.strftime("%Y%m%d-%H%M%S")}' utils.create_exp_dir(Path(args.exp_path), scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(args.exp_path / 'log.txt') fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) if args.seed is None: raise Exception('designate seed.') np.random.seed(args.seed) cudnn.benchmark = True cudnn.enabled = True torch.manual_seed(args.seed) # ================================================ # total, used = os.popen( # 'nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader' # ).read().split('\n')[args.gpu].split(',') # total = int(total) # used = int(used) # print('Total GPU mem:', total, 'used:', used) # try: # block_mem = 0.85 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # except RuntimeError as err: # print(err) # block_mem = 0.8 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # # # print('reuse mem now ...') # ================================================ logging.info(f'GPU device = {args.gpu}') logging.info(f'args = {args}') criterion = nn.CrossEntropyLoss().to(device) setting = args.location model = Network(args.init_ch, 10, args.layers, criterion, setting) checkpoint = None previous_epochs = 0 if args.checkpoint_path: checkpoint = torch.load(args.checkpoint_path) utils.load(model, checkpoint['state_dict'], False) previous_epochs = checkpoint['epoch'] args.epochs -= previous_epochs if args.epochs <= 0: raise Exception('args.epochs is too small.') if use_DataParallel: print('use Data Parallel') model = nn.parallel.DataParallel(model) model = model.cuda() module = model.module torch.cuda.manual_seed_all(args.seed) else: model = model.to(device) module = model param_size = utils.count_parameters_in_MB(model) logging.info(f'param size = {param_size}MB') arch_and_attn_params = list( map( id, module.arch_and_attn_parameters() if use_DataParallel else model.arch_and_attn_parameters())) weight_params = filter( lambda p: id(p) not in arch_and_attn_params, module.parameters() if use_DataParallel else model.parameters()) optimizer = optim.SGD(weight_params, args.lr, momentum=args.momentum, weight_decay=args.wd) if checkpoint: optimizer.load_state_dict(checkpoint['optimizer']) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) # 50000 indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # 25000 train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=8) # from 2 valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=8) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, eta_min=args.lr_min) if checkpoint: scheduler.load_state_dict(checkpoint['scheduler']) arch = Arch(model, criterion, args) if checkpoint: arch.optimizer.load_state_dict(checkpoint['arch_optimizer']) for epoch in tqdm(range(args.epochs), desc='Total Progress'): scheduler.step() lr = scheduler.get_lr()[0] logging.info(f'\nEpoch: {epoch} lr: {lr}') gen = module.genotype() logging.info(f'Genotype: {gen}') print(F.softmax(module.alphas_normal, dim=-1)) print(F.softmax(module.alphas_reduce, dim=-1)) if module.betas_normal is not None: print(F.softmax(module.betas_normal, dim=-1)) print(F.softmax(module.betas_reduce, dim=-1)) if module.gammas_normal is not None: print(F.softmax(module.gammas_normal, dim=-1)) print(F.softmax(module.gammas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, arch, criterion, optimizer, lr, epoch + 1) logging.info(f'train acc: {train_acc}') # validation valid_acc, valid_obj = infer(valid_queue, model, criterion, epoch + 1) logging.info(f'valid acc: {valid_acc}') utils.save(model, args.exp_path / 'search.pt') utils.save_checkpoint( { 'epoch': epoch + 1 + previous_epochs, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'arch_optimizer': arch.optimizer.state_dict(), 'scheduler': scheduler.state_dict() }, False, args.exp_path) gen = module.genotype() gen_path = args.exp_path / 'genotype.json' utils.save_genotype(gen, gen_path) logging.info(f'Result genotype: {gen}')
def main(pretrain=True): config.save = 'ckpt/{}'.format(config.save) create_exp_dir(config.save, scripts_to_save=glob.glob('*.py') + glob.glob('*.sh')) logger = SummaryWriter(config.save) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(config.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) assert type(pretrain) == bool or type(pretrain) == str update_arch = True if pretrain == True: update_arch = False logging.info("args = %s", str(config)) # preparation ################ torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True seed = config.seed np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) # Model ####################################### model = Network(config.layers, slimmable=config.slimmable, width_mult_list=config.width_mult_list, width_mult_list_sh=config.width_mult_list_sh, loss_weight=config.loss_weight, prun_modes=config.prun_modes, quantize=config.quantize) model = torch.nn.DataParallel(model).cuda() # print(model) # teacher_model = Generator(3, 3) # teacher_model.load_state_dict(torch.load(config.generator_A2B)) # teacher_model = torch.nn.DataParallel(teacher_model).cuda() # for param in teacher_model.parameters(): # param.require_grads = False if type(pretrain) == str: partial = torch.load(pretrain + "/weights.pt") state = model.state_dict() pretrained_dict = { k: v for k, v in partial.items() if k in state and state[k].size() == partial[k].size() } state.update(pretrained_dict) model.load_state_dict(state) # else: # features = [model.module.stem, model.module.cells, model.module.header] # init_weight(features, nn.init.kaiming_normal_, nn.InstanceNorm2d, config.bn_eps, config.bn_momentum, mode='fan_in', nonlinearity='relu') architect = Architect(model, config) # Optimizer ################################### base_lr = config.lr parameters = [] parameters += list(model.module.stem.parameters()) parameters += list(model.module.cells.parameters()) parameters += list(model.module.header.parameters()) if config.opt == 'Adam': optimizer = torch.optim.Adam(parameters, lr=base_lr, betas=config.betas) elif config.opt == 'Sgd': optimizer = torch.optim.SGD(parameters, lr=base_lr, momentum=config.momentum, weight_decay=config.weight_decay) else: logging.info("Wrong Optimizer Type.") sys.exit() # lr policy ############################## total_iteration = config.nepochs * config.niters_per_epoch if config.lr_schedule == 'linear': lr_policy = torch.optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=LambdaLR(config.nepochs, 0, config.decay_epoch).step) elif config.lr_schedule == 'exponential': lr_policy = torch.optim.lr_scheduler.ExponentialLR( optimizer, config.lr_decay) elif config.lr_schedule == 'multistep': lr_policy = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=config.milestones, gamma=config.gamma) else: logging.info("Wrong Learning Rate Schedule Type.") sys.exit() # data loader ########################### transforms_ = [ # transforms.Resize(int(config.image_height*1.12), Image.BICUBIC), # transforms.RandomCrop(config.image_height), # transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ] # train_loader_model = DataLoader(ImageDataset(config.dataset_path, transforms_=transforms_, unaligned=True, portion=config.train_portion), # batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers) # train_loader_arch = DataLoader(ImageDataset(config.dataset_path, transforms_=transforms_, unaligned=True, portion=config.train_portion-1), # batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers) train_loader_model = DataLoader(PairedImageDataset( config.dataset_path, config.target_path, transforms_=transforms_, portion=config.train_portion), batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers) train_loader_arch = DataLoader(PairedImageDataset( config.dataset_path, config.target_path, transforms_=transforms_, portion=config.train_portion - 1), batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers) transforms_ = [ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ] test_loader = DataLoader(ImageDataset(config.dataset_path, transforms_=transforms_, mode='test'), batch_size=1, shuffle=False, num_workers=config.num_workers) tbar = tqdm(range(config.nepochs), ncols=80) valid_fid_history = [] flops_history = [] flops_supernet_history = [] best_fid = 1000 best_epoch = 0 for epoch in tbar: logging.info(pretrain) logging.info(config.save) logging.info("lr: " + str(optimizer.param_groups[0]['lr'])) logging.info("update arch: " + str(update_arch)) # training tbar.set_description("[Epoch %d/%d][train...]" % (epoch + 1, config.nepochs)) train(pretrain, train_loader_model, train_loader_arch, model, architect, optimizer, lr_policy, logger, epoch, update_arch=update_arch) torch.cuda.empty_cache() lr_policy.step() # validation if epoch and not (epoch + 1) % config.eval_epoch: tbar.set_description("[Epoch %d/%d][validation...]" % (epoch + 1, config.nepochs)) save(model, os.path.join(config.save, 'weights_%d.pt' % epoch)) with torch.no_grad(): if pretrain == True: model.module.prun_mode = "min" valid_fid = infer(epoch, model, test_loader, logger) logger.add_scalar('fid/val_min', valid_fid, epoch) logging.info("Epoch %d: valid_fid_min %.3f" % (epoch, valid_fid)) if len(model.module._width_mult_list) > 1: model.module.prun_mode = "max" valid_fid = infer(epoch, model, test_loader, logger) logger.add_scalar('fid/val_max', valid_fid, epoch) logging.info("Epoch %d: valid_fid_max %.3f" % (epoch, valid_fid)) model.module.prun_mode = "random" valid_fid = infer(epoch, model, test_loader, logger) logger.add_scalar('fid/val_random', valid_fid, epoch) logging.info("Epoch %d: valid_fid_random %.3f" % (epoch, valid_fid)) else: model.module.prun_mode = None valid_fid, flops = infer(epoch, model, test_loader, logger, finalize=True) logger.add_scalar('fid/val', valid_fid, epoch) logging.info("Epoch %d: valid_fid %.3f" % (epoch, valid_fid)) logger.add_scalar('flops/val', flops, epoch) logging.info("Epoch %d: flops %.3f" % (epoch, flops)) valid_fid_history.append(valid_fid) flops_history.append(flops) if update_arch: flops_supernet_history.append(architect.flops_supernet) if valid_fid < best_fid: best_fid = valid_fid best_epoch = epoch logging.info("Best fid:%.3f, Best epoch:%d" % (best_fid, best_epoch)) if update_arch: state = {} state['alpha'] = getattr(model.module, 'alpha') state['beta'] = getattr(model.module, 'beta') state['ratio'] = getattr(model.module, 'ratio') state['beta_sh'] = getattr(model.module, 'beta_sh') state['ratio_sh'] = getattr(model.module, 'ratio_sh') state["fid"] = valid_fid state["flops"] = flops torch.save( state, os.path.join(config.save, "arch_%d_%f.pt" % (epoch, flops))) if config.flops_weight > 0: if flops < config.flops_min: architect.flops_weight /= 2 elif flops > config.flops_max: architect.flops_weight *= 2 logger.add_scalar("arch/flops_weight", architect.flops_weight, epoch + 1) logging.info("arch_flops_weight = " + str(architect.flops_weight)) save(model, os.path.join(config.save, 'weights.pt')) if update_arch: torch.save(state, os.path.join(config.save, "arch.pt"))