def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = model.cuda() utils.load(model, args.model_path) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() _, test_transform = utils.data_transforms_cifar10(args) test_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=test_transform) test_queue = torch.utils.data.DataLoader( test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) model.drop_path_prob = args.drop_path_prob test_acc, test_obj = infer(test_queue, model, criterion) logging.info('test_acc %f', test_acc)
def parse_rev_args(receive_msg): """ parse reveive msgs to global variable """ global trainloader global testloader global net global criterion global optimizer # Loading Data logger.debug("Preparing data..") transform_train, transform_test = utils.data_transforms_cifar10(args) trainset = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=2) # Model logger.debug("Building model..") net = build_graph_from_json(receive_msg) net = net.to(device) criterion = nn.CrossEntropyLoss() if device == "cuda" and torch.cuda.device_count() > 1: net = torch.nn.DataParallel(net) if args.optimizer == "SGD": optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=0.9, weight_decay=5e-4) if args.optimizer == "Adadelta": optimizer = optim.Adadelta(net.parameters(), lr=args.learning_rate) if args.optimizer == "Adagrad": optimizer = optim.Adagrad(net.parameters(), lr=args.learning_rate) if args.optimizer == "Adam": optimizer = optim.Adam(net.parameters(), lr=args.learning_rate) if args.optimizer == "Adamax": optimizer = optim.Adamax(net.parameters(), lr=args.learning_rate) if args.optimizer == "RMSprop": optimizer = optim.RMSprop(net.parameters(), lr=args.learning_rate) return 0
def build_validation_data_loader(self) -> DataLoader: _, valid_transform = data_transforms_cifar10() valid_data = dset.CIFAR10(root=self.data_dir, train=False, download=True, transform=valid_transform) valid_queue = DataLoader( valid_data, batch_size=self.context.get_per_slot_batch_size(), shuffle=False, num_workers=2, ) return valid_queue
def infer(model): test_data = dset.CIFAR10( root=TestConfig['data_path'], train=False, download=True, transform=data_transforms_cifar10(0, False), ) if DEBUG: sampler = torch.utils.data.sampler.SubsetRandomSampler(list( range(256))) test_queue = torch.utils.data.DataLoader( test_data, sampler=sampler, batch_size=TestConfig['batch_size'], shuffle=False, pin_memory=True, num_workers=16, ) else: test_queue = torch.utils.data.DataLoader( test_data, batch_size=TestConfig['batch_size'], shuffle=False, pin_memory=True, num_workers=16, ) model.eval().cuda() acc_avg = AvgrageMeter('acc') for step, (X, y) in enumerate(test_queue): X = Variable(X, requires_grad=False).cuda() y = Variable(y, requires_grad=False).cuda(non_blocking=True) logits, _ = model(X, TestConfig['drop_path_prob']) pred = torch.argmax(logits, dim=1) acc = torch.sum(pred == y).float() / TestConfig['batch_size'] acc_avg.update(acc) if step % TestConfig['log_freq'] is 0: print(f"test batch {step}: {acc_avg}") print(f"Final test: {acc_avg}")
def build_training_data_loader(self) -> DataLoader: """ For bi-level NAS, we'll need each instance from the dataloader to have one image for training shared-weights and another for updating architecture parameters. """ train_transform, _ = data_transforms_cifar10() train_data = dset.CIFAR10(root=self.data_dir, train=True, download=True, transform=train_transform) bilevel_data = BilevelDataset(train_data) self.train_data = bilevel_data train_queue = DataLoader( bilevel_data, batch_size=self.context.get_per_slot_batch_size(), shuffle=True, num_workers=2, ) return train_queue
def get_data_iters(params): """ Creates the train and validation data iterators by splitting the cifar-10 training set :param params: :return: """ # Loading Data train_transform, valid_transform = data_transforms_cifar10( cutout=params["cutout"], cutout_length=16) dataset_train = datasets.CIFAR10("./datasets", train=True, download=True, transform=train_transform) dataset_valid = datasets.CIFAR10("./datasets", train=True, download=True, transform=valid_transform) # training set contains 40,000 images, validation and test set contain 10,000 images dataset_valid = Subset( dataset_valid, range(4 * len(dataset_train) // 5, len(dataset_train))) dataset_train = Subset(dataset_train, range(4 * len(dataset_train) // 5)) # building cyclic iterators over the training and validation sets loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=params["batch_size"], shuffle=True) loader_valid = torch.utils.data.DataLoader(dataset_valid, batch_size=params["batch_size"], shuffle=True) print("Length of datasets: Train: {}, Valid: {}".format( len(dataset_train), len(dataset_valid))) print("Length of loaders: Train: {}, Valid: {}".format( len(loader_train), len(loader_valid))) return loader_train, loader_valid
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = model.to('cuda') logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.to('cuda') # not apply weight decay to BN layers if args.bn_no_decay: logging.info('BN layers are excluded from weight decay') bn_params, other_params = utils.split_bn_params(model) logging.debug('bn: %s', [p.dtype for p in bn_params]) logging.debug('other: %s', [p.dtype for p in other_params]) param_group = [{'params': bn_params, 'weight_decay': 0}, {'params': other_params}] else: param_group = model.parameters() optimizer = torch.optim.SGD( param_group, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay ) logging.info('optimizer: %s', optimizer) train_transform, valid_transform = utils.data_transforms_cifar10(args) train_data = datasets.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) valid_data = datasets.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.num_workers) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.num_workers) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs)) init_epoch = 0 best_acc = 0 if args.recover: states = torch.load(args.recover) model.load_state_dict(states['stete']) init_epoch = states['epoch'] + 1 best_acc = states['best_acc'] logging.info('checkpoint loaded') scheduler.step(init_epoch) logging.info('scheduler is set to epoch %d. learning rate is %s', init_epoch, scheduler.get_lr()) for epoch in range(init_epoch, args.epochs): logging.info('epoch %d lr %s', epoch, scheduler.get_lr()) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('train_acc %.4f', train_acc) with torch.no_grad(): valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) logging.info('epoch %03d overall train_acc=%.4f valid_acc=%.4f', epoch, train_acc, valid_acc) scheduler.step() # gpu info utils.gpu_usage(args.debug) if valid_acc > best_acc: best_acc = valid_acc logging.info('best acc: %.4f', best_acc) utils.save_checkpoint(state={'stete': model.state_dict(), 'epoch': epoch, 'best_acc': best_acc,}, is_best=False, save=args.save)
def estimate(esargs): global best_acc global trainloader global testloader global net global criterion global optimizer global rank #重置早停对象 early_stop = utils.EarlyStopping(mode="max") global best_acc best_acc = 0 lr_explore = esargs['learning_rate'] bs_explore = int(esargs['batch_size']) global trainloader transform_train, transform_test = utils.data_transforms_cifar10(args) trainset = torchvision.datasets.CIFAR10(root="/root/mountdir/data/", train=True, download=True, transform=transform_train) trainsampler = DistributedSampler(trainset) trainloader = torch.utils.data.DataLoader(trainset, batch_size=bs_explore, shuffle=False, num_workers=args.workers, pin_memory=False, sampler=trainsampler) op = optim.SGD(net.parameters(), lr=lr_explore, momentum=0.9, weight_decay=5e-4) for ep in range(args.epochs): current_ep = ep + 1 if rank == 0: if os.popen("grep epoch " + experiment_path + "/trials/" + str(nni.get_trial_id()) + "/output.log").read(): os.system("sed -i '/^epoch/cepoch=" + str(ep + 1) + "' " + experiment_path + "/trials/" + str(nni.get_trial_id()) + "/output.log") else: os.system("sed -i '$a\\epoch=" + str(ep + 1) + "' " + experiment_path + "/trials/" + str(nni.get_trial_id()) + "/output.log") try: train_acc = train(ep, op) except Exception as exception: f11 = open('/root/log', 'a+') f11.write('###### training is error \n') f11.write(str(exception) + "\n") f11.close() acclist.append(0) return 0, current_ep test_acc, best_acc = test(ep) logger.debug(test_acc) if early_stop.step(test_acc): break list = [best_acc, bs_explore, str(lr_explore)[0:7]] reslist.append(list) acclist.append(best_acc) return best_acc, current_ep
def parse_rev_args(receive_msg): """ parse reveive msgs to global variable """ global trainloader global testloader global trainsampler global testsampler global net global criterion global optimizer global rank, world_size # Loading Data if rank == 0: logger.debug("Preparing data..") transform_train, transform_test = utils.data_transforms_cifar10(args) dataPath = os.environ["HOME"] + "/mountdir/data/" trainset = torchvision.datasets.CIFAR10(root=dataPath, train=True, download=True, transform=transform_train) # # trainsampler = DistributedSampler(trainset) # # trainloader = torch.utils.data.DataLoader( # trainset, batch_size=args.batch_size_per_gpu, shuffle=False, num_workers=args.workers, # pin_memory=False, sampler=trainsampler # ) testset = torchvision.datasets.CIFAR10(root=dataPath, train=False, download=True, transform=transform_test) testsampler = DistributedSampler(testset) testloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=False, sampler=testsampler) if rank == 0: print("len(trainset)=" + str(len(trainset))) print("len(testset)=" + str(len(testset))) # Model if rank == 0: logger.debug("Building model..") net = build_graph_from_json(receive_msg) net = net.to(device) net = DistModule(net) criterion = nn.CrossEntropyLoss() # if args.optimizer == "SGD": # optimizer = optim.SGD( # net.parameters(), lr=args.learning_rate, momentum=0.9, weight_decay=5e-4 # ) # if args.optimizer == "Adadelta": # optimizer = optim.Adadelta(net.parameters(), lr=args.learning_rate) # if args.optimizer == "Adagrad": # optimizer = optim.Adagrad(net.parameters(), lr=args.learning_rate) # if args.optimizer == "Adam": # optimizer = optim.Adam(net.parameters(), lr=args.learning_rate) # if args.optimizer == "Adamax": # optimizer = optim.Adamax(net.parameters(), lr=args.learning_rate) # if args.optimizer == "RMSprop": # optimizer = optim.RMSprop(net.parameters(), lr=args.learning_rate) cudnn.benchmark = True return 0
ModelConfig['alphas_path']): logger.warning('cant find alpha in the specified path') sys.exit(1) alpha_normal, alpha_reduce = load_alphas(ModelConfig['alphas_path']) snas_cell = genotype(alpha_normal, alpha_reduce, ModelConfig['steps'], ModelConfig['multiplier']) auxiliary = TrainerConfig['aux_weight'] is not None model = Network(ModelConfig['init_channels'], ModelConfig['classes'], ModelConfig['layers'], auxiliary, snas_cell) train_data = dset.CIFAR10( root=DataConfig['data_path'], train=True, download=True, transform=data_transforms_cifar10(DataConfig['cutout_length'], True), ) if DEBUG: sampler = torch.utils.data.sampler.SubsetRandomSampler(list( range(256))) train_queue = torch.utils.data.DataLoader( train_data, sampler=sampler, batch_size=DataConfig['batch_size'], shuffle=False, pin_memory=True, num_workers=16, ) else:
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('GPU device = %d' % args.gpu) logging.info("args = %s", args) gpu_logger = GpuLogThread([args.gpu], writer, seconds=15 if not args.test else 1) gpu_logger.start() logging.debug(locals()) model = None # prepare dataset if args.cifar100: train_transform, _ = utils.data_transforms_cifar100(args) train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) else: train_transform, _ = utils.data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_iterator = utils.DynamicBatchSizeLoader( torch.utils.data.DataLoader( train_data, batch_size=args.batch_multiples, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[:split]), pin_memory=True, num_workers=args.workers), args.batch_size_min) valid_iterator = utils.DynamicBatchSizeLoader( torch.utils.data.DataLoader( train_data, batch_size=args.batch_multiples, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=args.workers), args.batch_size_min) # build Network logging.debug('building network') criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() num_graph_edges = sum(list(range(2, 2 + args.blocks))) switches_normal = SwitchManager(num_graph_edges, copy.deepcopy(PRIMITIVES), 'normal') switches_reduce = SwitchManager(num_graph_edges, copy.deepcopy(PRIMITIVES), 'reduce') total_epochs = 0 for cycle in parse_cycles(): logging.debug('new cycle %s' % repr(cycle)) print('\n' * 3, '-' * 100) print(cycle) print('', '-' * 100, '\n') writer.add_scalar('cycle/net_layers', cycle.net_layers, cycle.num) writer.add_scalar('cycle/net_init_c', cycle.net_init_c, cycle.num) writer.add_scalar('cycle/net_dropout', cycle.net_dropout, cycle.num) writer.add_scalar('cycle/ops_keep', cycle.ops_keep, cycle.num) writer.add_scalar('cycle/epochs', cycle.epochs, cycle.num) writer.add_scalar('cycle/grace_epochs', cycle.grace_epochs, cycle.num) writer.add_scalar('cycle/morphs', cycle.morphs, cycle.num) switches_normal.plot_ops(logging.info, writer, cycle.num) switches_reduce.plot_ops(logging.info, writer, cycle.num) # rebuild the model in each cycle, clean up the cache... logging.debug('building model') del model torch.cuda.empty_cache() model = Network(cycle.net_init_c, 100 if args.cifar100 else 10, cycle.net_layers, criterion, switches_normal=switches_normal, switches_reduce=switches_reduce, steps=args.blocks, p=cycle.net_dropout) gpu_logger.reset_recent() if cycle.load: utils.load(model, model_path) if args.reset_alphas: model.reset_alphas() if args.test: model.randomize_alphas() if cycle.init_morphed: model.init_morphed(switches_normal, switches_reduce) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) logging.debug('building optimizers') optimizer = torch.optim.SGD(model.net_parameters, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer_a = torch.optim.Adam(model.arch_parameters, lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) logging.debug('building scheduler') scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, int(cycle.epochs), eta_min=args.learning_rate_min) if args.batch_size_max > args.batch_size_min: train_iterator.set_batch_size(args.batch_size_min) valid_iterator.set_batch_size(args.batch_size_min) sm_dim = -1 scale_factor = 0.2 for epoch in range(cycle.epochs): lr = scheduler.get_lr()[0] logging.info('Epoch: %d lr: %e', epoch, lr) epoch_start = time.time() # training if epoch < cycle.grace_epochs: model.update_p(cycle.net_dropout * (cycle.epochs - epoch - 1) / cycle.epochs) else: model.update_p( cycle.net_dropout * np.exp(-(epoch - cycle.grace_epochs) * scale_factor)) train_acc, train_obj = train( train_iterator, valid_iterator, model, criterion, optimizer, optimizer_a, gpu_logger, train_arch=epoch >= cycle.grace_epochs) epoch_duration = time.time() - epoch_start # log info logging.info('Train_acc %f', train_acc) logging.info('Epoch time: %ds', epoch_duration) writer.add_scalar('train/accuracy', train_acc, total_epochs) writer.add_scalar('train/loss', train_obj, total_epochs) writer.add_scalar('epoch/lr', lr, total_epochs) writer.add_scalar('epoch/seconds', epoch_duration, total_epochs) writer.add_scalar('epoch/model.p', model.p, total_epochs) writer.add_scalar('epoch/batch_size', train_iterator.batch_size, total_epochs) # validation, only for the last 5 epochs in a cycle if cycle.epochs - epoch < 5: valid_acc, valid_obj = infer(valid_iterator, model, criterion) logging.info('Valid_acc %f', valid_acc) writer.add_scalar('valid/accuracy', valid_acc, total_epochs) writer.add_scalar('valid/loss', valid_obj, total_epochs) total_epochs += 1 gpu_logger.reset_recent() scheduler.step() utils.save(model, model_path) print('\n' * 2, '------Dropping/morphing paths------') # Save switches info for s-c refinement. if cycle.is_last: switches_normal_copy = switches_normal.copy() switches_reduce_copy = switches_reduce.copy() # drop operations with low architecture weights, add morphed ones arch_param = model.arch_parameters normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() switches_normal.drop_and_morph(normal_prob, cycle.ops_keep, writer, cycle.num, num_morphs=cycle.morphs, no_zero=cycle.is_last and args.restrict_zero, keep_morphable=not cycle.is_last) reduce_prob = F.softmax(arch_param[1], dim=sm_dim).data.cpu().numpy() switches_reduce.drop_and_morph(reduce_prob, cycle.ops_keep, writer, cycle.num, num_morphs=cycle.morphs, no_zero=cycle.is_last and args.restrict_zero, keep_morphable=not cycle.is_last) logging.info('switches_normal = \n%s', switches_normal) logging.info('switches_reduce = \n%s', switches_reduce) # end last cycle with shortcut/zero pruning and save the genotype if cycle.is_last: arch_param = model.arch_parameters normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() reduce_prob = F.softmax(arch_param[1], dim=sm_dim).data.cpu().numpy() normal_final = [0 for _ in range(num_graph_edges)] reduce_final = [0 for _ in range(num_graph_edges)] # Generate Architecture keep_normal = [0, 1] keep_reduce = [0, 1] n = 3 start = 2 for i in range(3): end = start + n tbsn = normal_final[start:end] tbsr = reduce_final[start:end] edge_n = sorted(range(n), key=lambda x: tbsn[x]) keep_normal.append(edge_n[-1] + start) keep_normal.append(edge_n[-2] + start) edge_r = sorted(range(n), key=lambda x: tbsr[x]) keep_reduce.append(edge_r[-1] + start) keep_reduce.append(edge_r[-2] + start) start = end n = n + 1 for i in range(num_graph_edges): if i not in keep_normal: for j in range(len(switches_normal.current_ops)): switches_normal[i][j] = False if i not in keep_reduce: for j in range(len(switches_reduce.current_ops)): switches_reduce[i][j] = False switches_normal.keep_2_branches(normal_prob) switches_reduce.keep_2_branches(reduce_prob) switches_normal.plot_ops(logging.info, writer, cycle.num + 1) switches_reduce.plot_ops(logging.info, writer, cycle.num + 1) genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype) save_genotype(args.save + 'genotype.json', genotype) gpu_logger.stop()
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) torch.cuda.set_device(args.gpu) cudnn.benchmark = True cudnn.enabled = True logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) logging.info(genotype) dataset = params.datasets[args.dset_name] network_params = { 'C': args.init_channels, 'num_classes': dataset.num_classes, 'layers': args.layers, 'num_reductions': args.num_reductions, 'reduction_location_mode': args.reduction_location_mode, 'genotype': genotype, 'stem_multiplier': dataset.num_channels, 'do_SE': args.do_SE } model = Network(**network_params) logging.info("Loading model parameters from %s", args.model_path) utils.load(model, args.model_path) flops, num_params = None, None if args.calc_flops: from thop import profile, clever_format input = torch.randn(1, dataset.num_channels, dataset.hw[0], dataset.hw[1]) flops, num_params = profile(model, inputs=(input, )) flops, num_params = clever_format([flops, num_params], "%.2f") model = model.cuda() test_transform = data_transforms_cifar10() test_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=test_transform) test_queue = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=0) with torch.no_grad(): test_acc, infer_time = infer(test_queue, model, args.report_freq) if args.calc_flops: logging.info( 'Test Accuracy: %.2f%% | Number of parameters: %s | Inference time: %2.2fms | Flops: %s', test_acc, num_params, infer_time * 1000, flops) else: logging.info('Test Accuracy: %.2f%% | Inference time: %2.2fms', test_acc, infer_time * 1000)
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) logging.info("unparsed args = %s", unparsed) num_gpus = torch.cuda.device_count() gpu_logger = GpuLogThread(list(range(num_gpus)), writer, seconds=10 if args.test else 300) gpu_logger.start() genotype = genotypes.load_genotype(args.arch, skip_cons=args.arch_pref_sc) print('---------Genotype---------') logging.info(genotype) print('--------------------------') model = Network(args.init_channels, 100 if args.cifar100 else 10, args.layers, args.auxiliary, genotype) if num_gpus > 1: model = nn.DataParallel(model) model = model.cuda() logging.info("param count = %d", utils.count_parameters(model)) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) if args.cifar100: train_transform, valid_transform = utils.data_transforms_cifar100(args) else: train_transform, valid_transform = utils.data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) train_iterator = utils.DynamicBatchSizeLoader(torch.utils.data.DataLoader( train_data, batch_size=args.batch_multiples, shuffle=True, pin_memory=True, num_workers=args.workers), args.batch_size_min) test_iterator = utils.DynamicBatchSizeLoader(torch.utils.data.DataLoader( valid_data, batch_size=args.batch_multiples, shuffle=False, pin_memory=True, num_workers=args.workers), args.batch_size_min) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs)) best_acc = 0.0 for epoch in range(args.epochs): lr = scheduler.get_lr()[0] drop_path_prob = args.drop_path_prob * epoch / args.epochs logging.info('Epoch: %d lr %e', epoch, lr) if num_gpus > 1: model.module.drop_path_prob = drop_path_prob else: model.drop_path_prob = drop_path_prob epoch_start_time = time.time() train_acc, train_obj = train(train_iterator, test_iterator, model, criterion, optimizer, gpu_logger) logging.info('Train_acc: %f', train_acc) test_acc, test_obj = infer(test_iterator, model, criterion) if test_acc > best_acc: best_acc = test_acc logging.info('Valid_acc: %f', test_acc) epoch_duration = time.time() - epoch_start_time utils.save(model, os.path.join(args.save, 'weights.pt')) # log info print('Epoch time: %ds.' % epoch_duration) writer.add_scalar('epoch/lr', lr, epoch) writer.add_scalar('epoch/drop_path_prob', drop_path_prob, epoch) writer.add_scalar('epoch/seconds', epoch_duration, epoch) writer.add_scalar('epoch/batch_size', train_iterator.batch_size, epoch) writer.add_scalar('train/accuracy', train_acc, epoch) writer.add_scalar('train/loss', train_obj, epoch) writer.add_scalar('test/accuracy', test_acc, epoch) writer.add_scalar('test/loss', test_obj, epoch) scheduler.step() gpu_logger.stop()