def main(): if not torch.cuda.is_available(): logging.info('No GPU found!') sys.exit(1) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.enabled = True cudnn.benchmark = False cudnn.deterministic = True args.steps = int(np.ceil(50000 / args.batch_size)) * args.epochs logging.info("Args = %s", args) _, model_state_dict, epoch, step, optimizer_state_dict, best_acc_top1 = utils.load(args.output_dir) build_fn = get_builder(args.dataset) train_queue, valid_queue, model, train_criterion, eval_criterion, optimizer, scheduler = build_fn(model_state_dict, optimizer_state_dict, epoch=epoch-1) while epoch < args.epochs: scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) train_acc, train_obj, step = train(train_queue, model, optimizer, step, train_criterion) logging.info('train_acc %f', train_acc) valid_acc_top1, valid_obj = valid(valid_queue, model, eval_criterion) logging.info('valid_acc %f', valid_acc_top1) epoch += 1 is_best = False if valid_acc_top1 > best_acc_top1: best_acc_top1 = valid_acc_top1 is_best = True utils.save(args.output_dir, args, model, epoch, step, optimizer, best_acc_top1, is_best)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) if args.gpu == -1: device = torch.device('cpu') else: device = torch.device('cuda:{}'.format(args.gpu)) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, dataset_classes, args.layers, args.auxiliary, genotype) model = Network(args) model = model.to(device) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.to(device) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay ) train_data = MyDataset(args=args, subset='train') valid_data = MyDataset(args=args, subset='valid') train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs)) for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj, trian_fscores, train_MIoU = train(train_queue, model, criterion, optimizer) logging.info('train_acc %f _fscores %f _MIoU %f', train_acc, trian_fscores, train_MIoU) valid_acc, valid_obj, valid_fscores, valid_MIoU = infer(valid_queue, model, criterion) logging.info('valid_acc %f _fscores %f _MIoU %f', valid_acc, valid_fscores, valid_MIoU) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype_path = os.path.join(utils.get_dir(), args.genotype_path, 'genotype.txt') if os.path.isfile(genotype_path): with open(genotype_path, "r") as f: geno_raw = f.read() genotype = eval(geno_raw) else: genotype = eval("genotypes.%s" % args.arch) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) datapath = os.path.join(utils.get_dir(), args.data) if args.task == "CIFAR100cf": train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = utils.CIFAR100C2F(root=datapath, train=True, download=True, transform=train_transform) valid_data = utils.CIFAR100C2F(root=datapath, train=False, download=True, transform=valid_transform) train_indices = train_data.filter_by_fine(args.eval_filter) valid_indices = valid_data.filter_by_fine(args.eval_filter) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_indices), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader(torch.utils.data.Subset( valid_data, valid_indices), batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) # TODO: extend each epoch or multiply number of epochs by 20%*args.class_filter else: if args.task == "CIFAR100": train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = dset.CIFAR100(root=datapath, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=datapath, train=False, download=True, transform=valid_transform) else: train_transform, valid_transform = utils._data_transforms_cifar10( args) train_data = dset.CIFAR10(root=datapath, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=datapath, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.epochs) for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_last_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('train_acc %f', train_acc) valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) if args.test: torch.cuda.empty_cache() os.system( 'python src/test.py --batch_size 8 --auxiliary --model_path %s --task %s --test_filter %s' % (os.path.join(args.save, 'weights.pt'), args.task, args.test_filter))
def main(): # Select the search space to search in if args.search_space == '1': search_space = SearchSpace1() elif args.search_space == '2': search_space = SearchSpace2() elif args.search_space == '3': search_space = SearchSpace3() else: raise ValueError('Unknown search space') if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, output_weights=args.output_weights, steps=search_space.num_intermediate_nodes, search_space=search_space) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) nasbench = None for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # Save the one shot model architecture weights for later analysis arch_filename = os.path.join( args.save, 'one_shot_architecture_{}.obj'.format(epoch)) with open(arch_filename, 'wb') as filehandler: numpy_tensor_list = [] for tensor in model.arch_parameters(): numpy_tensor_list.append(tensor.detach().cpu().numpy()) pickle.dump(numpy_tensor_list, filehandler) # Save the entire one-shot-model filepath = os.path.join(args.save, 'one_shot_model_{}.obj'.format(epoch)) torch.save(model.state_dict(), filepath) logging.info('architecture') logging.info(numpy_tensor_list) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) logging.info('STARTING EVALUATION') if nasbench is None: nasbench = NasbenchWrapper( dataset_file='/nasbench_data/nasbench_only108.tfrecord') test, valid, runtime, params = naseval.eval_one_shot_model( config=args.__dict__, model=arch_filename, nasbench_results=nasbench) index = np.random.choice(list(range(3))) logging.info( 'TEST ERROR: %.3f | VALID ERROR: %.3f | RUNTIME: %f | PARAMS: %d' % (test[index], valid[index], runtime[index], params[index])) if args.s3_bucket is not None: for root, dirs, files in os.walk(args.save): for f in files: if 'one_shot_model' not in f: path = os.path.join(root, f) upload_to_s3(path, args.s3_bucket, path)
def main(): if is_wandb_used: wandb.init(project="automl-gradient-based-nas", name="r" + str(args.run_id) + "-e" + str(args.epochs) + "-lr" + str(args.learning_rate) + "-l(" + str(args.lambda_train_regularizer) + "," + str(args.lambda_valid_regularizer) + ")", config=args, entity="automl") global is_multi_gpu gpus = [int(i) for i in args.gpu.split(',')] logging.info('gpus = %s' % gpus) if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %s' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() # default: args.init_channels = 16, CIFAR_CLASSES = 10, args.layers = 8 if args.arch_search_method == "DARTS": model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) elif args.arch_search_method == "GDAS": model = Network_GumbelSoftmax(args.init_channels, CIFAR_CLASSES, args.layers, criterion) else: model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) if len(gpus) > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) is_multi_gpu = True model.cuda() if args.model_path != "saved_models": utils.load(model, args.model_path) arch_parameters = model.module.arch_parameters( ) if is_multi_gpu else model.arch_parameters() arch_params = list(map(id, arch_parameters)) parameters = model.module.parameters( ) if is_multi_gpu else model.parameters() weight_params = filter(lambda p: id(p) not in arch_params, parameters) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( weight_params, # model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) # will cost time to download the data train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # split index train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size * len(gpus), sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size * len(gpus), sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, criterion, args) best_accuracy = 0 best_accuracy_different_cnn_counts = dict() if is_wandb_used: table = wandb.Table(columns=["Epoch", "Searched Architecture"]) for epoch in range(args.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # training train_acc, train_obj, train_loss = train(epoch, train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) if is_wandb_used: wandb.log({"searching_train_acc": train_acc, "epoch": epoch}) wandb.log({"searching_train_loss": train_loss, "epoch": epoch}) # validation with torch.no_grad(): valid_acc, valid_obj, valid_loss = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) scheduler.step() if is_wandb_used: wandb.log({"searching_valid_acc": valid_acc, "epoch": epoch}) wandb.log({"searching_valid_loss": valid_loss, "epoch": epoch}) wandb.log({ "search_train_valid_acc_gap": train_acc - valid_acc, "epoch": epoch }) wandb.log({ "search_train_valid_loss_gap": train_loss - valid_loss, "epoch": epoch }) # save the structure genotype, normal_cnn_count, reduce_cnn_count = model.module.genotype( ) if is_multi_gpu else model.genotype() # early stopping if args.early_stopping == 1: if normal_cnn_count == 6 and reduce_cnn_count == 0: break print("(n:%d,r:%d)" % (normal_cnn_count, reduce_cnn_count)) print( F.softmax(model.module.alphas_normal if is_multi_gpu else model.alphas_normal, dim=-1)) print( F.softmax(model.module.alphas_reduce if is_multi_gpu else model.alphas_reduce, dim=-1)) logging.info('genotype = %s', genotype) if is_wandb_used: wandb.log({"genotype": str(genotype)}, step=epoch - 1) table.add_data(str(epoch), str(genotype)) wandb.log({"Searched Architecture": table}) # save the cnn architecture according to the CNN count cnn_count = normal_cnn_count * 10 + reduce_cnn_count wandb.log({ "searching_cnn_count(%s)" % cnn_count: valid_acc, "epoch": epoch }) if cnn_count not in best_accuracy_different_cnn_counts.keys(): best_accuracy_different_cnn_counts[cnn_count] = valid_acc summary_key_cnn_structure = "best_acc_for_cnn_structure(n:%d,r:%d)" % ( normal_cnn_count, reduce_cnn_count) wandb.run.summary[summary_key_cnn_structure] = valid_acc summary_key_best_cnn_structure = "epoch_of_best_acc_for_cnn_structure(n:%d,r:%d)" % ( normal_cnn_count, reduce_cnn_count) wandb.run.summary[summary_key_best_cnn_structure] = epoch else: if valid_acc > best_accuracy_different_cnn_counts[cnn_count]: best_accuracy_different_cnn_counts[cnn_count] = valid_acc summary_key_cnn_structure = "best_acc_for_cnn_structure(n:%d,r:%d)" % ( normal_cnn_count, reduce_cnn_count) wandb.run.summary[summary_key_cnn_structure] = valid_acc summary_key_best_cnn_structure = "epoch_of_best_acc_for_cnn_structure(n:%d,r:%d)" % ( normal_cnn_count, reduce_cnn_count) wandb.run.summary[summary_key_best_cnn_structure] = epoch if valid_acc > best_accuracy: best_accuracy = valid_acc wandb.run.summary["best_valid_accuracy"] = valid_acc wandb.run.summary["epoch_of_best_accuracy"] = epoch utils.save(model, os.path.join(wandb.run.dir, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, args.rho, args.ewma) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) datapath = os.path.join(utils.get_dir(), args.data) train_data = dset.CIFAR10(root=datapath, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, int(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) model.initialize_Z_and_U() loggers = {"train": {"loss": [], "acc": [], "step": []}, "val": {"loss": [], "acc": [], "step": []}, "infer": {"loss": [], "acc": [], "step": []}, "ath": {"threshold": [], "step": []}, "zuth": {"threshold": [], "step": []}, "astep": [], "zustep": []} if args.constant_alpha_threshold < 0: alpha_threshold = args.init_alpha_threshold else: alpha_threshold = args.constant_alpha_threshold zu_threshold = args.init_zu_threshold alpha_counter = 0 ewma = -1 for epoch in range(args.epochs): valid_iter = iter(valid_queue) model.clear_U() scheduler.step() lr = scheduler.get_last_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(torch.clamp(model.alphas_normal, min=0.1, max=1.0)) print(torch.clamp(model.alphas_reduce, min=0.1, max=1.0)) # training train_acc, train_obj, alpha_threshold, zu_threshold, alpha_counter, ewma = train(train_queue, valid_iter, model, architect, criterion, optimizer, lr, loggers, alpha_threshold, zu_threshold, alpha_counter, ewma, args) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) utils.log_loss(loggers["infer"], valid_obj, valid_acc, model.clock) logging.info('valid_acc %f', valid_acc) utils.plot_loss_acc(loggers, args.save) # model.update_history() utils.save_file(recoder=model.alphas_normal_history, path=os.path.join(args.save, 'normalalpha'), steps=loggers["train"]["step"]) utils.save_file(recoder=model.alphas_reduce_history, path=os.path.join(args.save, 'reducealpha'), steps=loggers["train"]["step"]) utils.save_file(recoder=model.FI_normal_history, path=os.path.join(args.save, 'normalFI'), steps=loggers["train"]["step"]) utils.save_file(recoder=model.FI_reduce_history, path=os.path.join(args.save, 'reduceFI'), steps=loggers["train"]["step"]) scaled_FI_normal = scale(model.FI_normal_history, model.alphas_normal_history) scaled_FI_reduce = scale(model.FI_reduce_history, model.alphas_reduce_history) utils.save_file(recoder=scaled_FI_normal, path=os.path.join(args.save, 'normalFIscaled'), steps=loggers["train"]["step"]) utils.save_file(recoder=scaled_FI_reduce, path=os.path.join(args.save, 'reduceFIscaled'), steps=loggers["train"]["step"]) utils.plot_FI(loggers["train"]["step"], model.FI_history, args.save, "FI", loggers["ath"], loggers['astep']) utils.plot_FI(loggers["train"]["step"], model.FI_ewma_history, args.save, "FI_ewma", loggers["ath"], loggers['astep']) utils.plot_FI(model.FI_alpha_history_step, model.FI_alpha_history, args.save, "FI_alpha", loggers["zuth"], loggers['zustep']) utils.save(model, os.path.join(args.save, 'weights.pt')) genotype = model.genotype() logging.info('genotype = %s', genotype) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close()
writer.add_scalar("valid_acc", valid_acc, epoch + 1) else: test_acc = get_arch_score(api, arch_index, args.dataset, 200, acc_type) valid_acc = get_arch_score(api, arch_index, args.dataset, 200, val_acc_type) writer.add_scalar("test_acc", test_acc, epoch + 1) writer.add_scalar("valid_acc", valid_acc, epoch + 1) tmp = (arch_str, test_acc, valid_acc) best_arch_per_epoch.append(tmp) # Applying Genetic Algorithm pop = ga.evolve(population) population = pop last = time.time() - start_time logging.info("[INFO] {}/{} epoch finished in {} minutes".format(epoch + 1, args.epochs, last / 60)) utils.save(model, os.path.join(DIR, "weights","weights.pt")) #if epoch > 0: # break writer.close() last = time.time() - start logging.info("[INFO] {} hours".format(last / 3600)) logging.info(f'[INFO] Best Architecture after the search: {best_arch_per_epoch[-1]}') logging.info(f'length best_arch_per_epoch: {len(best_arch_per_epoch)}') with open(os.path.join(DIR, "best_architectures.pickle"), 'wb') as f: pickle.dump(best_arch_per_epoch, f)
def main(): utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) print(args) seed = random.randint(1, 100000000) print(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True cudnn.enabled = True n_channels = 3 n_bins = 2.**args.n_bits # Define model and loss criteria model = SearchNetwork(n_channels, args.n_flow, args.n_block, n_bins, affine=args.affine, conv_lu=not args.no_lu) model = nn.DataParallel(model, [args.gpu]) model.load_state_dict( torch.load("architecture.pt", map_location="cuda:{}".format(args.gpu))) model = model.module genotype = model.sample_architecture() with open(args.save + '/genotype.pkl', 'wb') as fp: pickle.dump(genotype, fp) model_single = EnsembleNetwork(n_channels, args.n_flow, args.n_block, n_bins, genotype, affine=args.affine, conv_lu=not args.no_lu) model = model_single model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), args.learning_rate) dataset = iter(sample_cifar10(args.batch, args.img_size)) # Sample generated images z_sample = [] z_shapes = calc_z_shapes(n_channels, args.img_size, args.n_flow, args.n_block) for z in z_shapes: z_new = torch.randn(args.n_sample, *z) * args.temp z_sample.append(z_new.to(device)) with tqdm(range(args.iter)) as pbar: for i in pbar: # Training procedure model.train() # Get a random minibatch from the search queue with replacement input, _ = next(dataset) input = Variable(input, requires_grad=False).cuda(non_blocking=True) log_p, logdet, _ = model(input + torch.rand_like(input) / n_bins) logdet = logdet.mean() loss, _, _ = likelihood_loss(log_p, logdet, args.img_size, n_bins) # Optimize model optimizer.zero_grad() loss.backward() optimizer.step() pbar.set_description("Loss: {}".format(loss.item())) # Save generated samples if i % 100 == 0: with torch.no_grad(): tvutils.save_image( model_single.reverse(z_sample).cpu().data, "{}/samples/{}.png".format(args.save, str(i + 1).zfill(6)), normalize=False, nrow=10, ) # Save checkpoint if i % 1000 == 0: utils.save(model, os.path.join(args.save, 'latest_weights.pt'))
def main(): # Select the search space to search in if args.search_space == '1': search_space = SearchSpace1() elif args.search_space == '2': search_space = SearchSpace2() elif args.search_space == '3': search_space = SearchSpace3() else: raise ValueError('Unknown search space') if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.num_linear_layers, args.init_channels, CIFAR_CLASSES, args.layers, criterion, output_weights=args.output_weights, steps=search_space.num_intermediate_nodes, search_space=search_space) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) # Read a random sample of architectures archs = pickle.load( open( '/home/siemsj/projects/darts_weight_sharing_analysis/nasbench_analysis/architecture_inductive_bias/sampled_architectures_from_search_space_3.obj', 'rb')) arch = archs[args.arch_idx] arch_parameters = get_weights_from_arch(arch, model) model._arch_parameters = arch_parameters try: for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] # increase the cutout probability linearly throughout search train_transform.transforms[ -1].cutout_prob = args.cutout_prob * epoch / (args.epochs - 1) logging.info('epoch %d lr %e cutout_prob %e', epoch, lr, train_transform.transforms[-1].cutout_prob) # Save the one shot model architecture weights for later analysis arch_filename = os.path.join( args.save, 'one_shot_architecture_{}.obj'.format(epoch)) with open(arch_filename, 'wb') as filehandler: numpy_tensor_list = [] for tensor in model.arch_parameters(): numpy_tensor_list.append(tensor.detach().cpu().numpy()) pickle.dump(numpy_tensor_list, filehandler) # Save the entire one-shot-model # filepath = os.path.join(args.save, 'one_shot_model_{}.obj'.format(epoch)) # torch.save(model.state_dict(), filepath) logging.info('architecture', numpy_tensor_list) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) logging.info('STARTING EVALUATION') test, valid, runtime, params = naseval.eval_one_shot_model( config=args.__dict__, model=arch_filename) index = np.random.choice(list(range(3))) logging.info( 'TEST ERROR: %.3f | VALID ERROR: %.3f | RUNTIME: %f | PARAMS: %d' % (test[index], valid[index], runtime[index], params[index])) except Exception as e: logging.exception('message')
def main(): if not 'debug' in args.save: from nasbench_analysis import eval_darts_one_shot_model_in_nasbench as naseval # Select the search space to search in if args.search_space == '1': search_space = SearchSpace1() elif args.search_space == '2': search_space = SearchSpace2() elif args.search_space == '3': search_space = SearchSpace3() else: raise ValueError('Unknown search space') torch.set_num_threads(3) if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, output_weights=args.output_weights, steps=search_space.num_intermediate_nodes, search_space=search_space) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) if 'debug' in args.save: split = args.batch_size num_train = 2 * args.batch_size train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) analyzer = Analyzer(model, args) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # Save the one shot model architecture weights for later analysis arch_filename = os.path.join( args.save, 'one_shot_architecture_{}.obj'.format(epoch)) with open(arch_filename, 'wb') as filehandler: numpy_tensor_list = [] for tensor in model.arch_parameters(): numpy_tensor_list.append(tensor.detach().cpu().numpy()) pickle.dump(numpy_tensor_list, filehandler) # # Save the entire one-shot-model # filepath = os.path.join(args.save, 'one_shot_model_{}.obj'.format(epoch)) # torch.save(model.state_dict(), filepath) for i in numpy_tensor_list: print(i) # training train_acc, train_obj, ev = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch, analyzer) logging.info('train_acc %f', train_acc) logging.info('eigenvalue %f', ev) writer.add_scalar('Acc/train', train_acc, epoch) writer.add_scalar('Obj/train', train_obj, epoch) writer.add_scalar('Analysis/eigenvalue', ev, epoch) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) writer.add_scalar('Acc/valid', valid_acc, epoch) writer.add_scalar('Obj/valid', valid_obj, epoch) utils.save(model, os.path.join(args.save, 'weights.pt')) if not 'debug' in args.save: # benchmark logging.info('STARTING EVALUATION') test, valid, runtime, params = naseval.eval_one_shot_model( config=args.__dict__, model=arch_filename) index = np.random.choice(list(range(3))) test, valid, runtime, params = np.mean(test), np.mean( valid), np.mean(runtime), np.mean(params) logging.info( 'TEST ERROR: %.3f | VALID ERROR: %.3f | RUNTIME: %f | PARAMS: %d' % (test, valid, runtime, params)) writer.add_scalar('Analysis/test', test, epoch) writer.add_scalar('Analysis/valid', valid, epoch) writer.add_scalar('Analysis/runtime', runtime, epoch) writer.add_scalar('Analysis/params', params, epoch) writer.close()
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype_path = os.path.join(utils.get_dir(), args.genotype_path, 'genotype.txt') if os.path.isfile(genotype_path): with open(genotype_path, "r") as f: geno_raw = f.read() genotype = eval(geno_raw) else: genotype = eval("genotypes.%s" % args.arch) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close() model = Network(args.init_channels, 1, args.layers, args.auxiliary, genotype, input_channels=4) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.MSELoss() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # train_transform, valid_transform = utils._data_transforms_cifar10(args) # datapath = os.path.join(utils.get_dir(), args.data) # train_data = dset.CIFAR10(root=datapath, train=True, download=True, transform=train_transform) # valid_data = dset.CIFAR10(root=datapath, train=False, download=True, transform=valid_transform) train_data = utils.BathymetryDataset(args, "../mixed_train.csv", to_filter=False) valid_data = utils.BathymetryDataset(args, "../mixed_validation.csv", to_filter=False) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.epochs) loggers = { "train": { "loss": [], "step": [] }, "val": { "loss": [], "step": [] } } for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_last_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs _ = train(train_queue, model, criterion, optimizer, loggers["train"]) infer_loss = infer(valid_queue, model, criterion) utils.log_loss(loggers["val"], infer_loss, None, 1) utils.plot_loss_acc(loggers, args.save) utils.save(model, os.path.join(args.save, 'weights.pt')) if (epoch + 1) % 50 == 0: utils.save( model, os.path.join(args.save, 'checkpoint' + str(epoch) + 'weights.pt'))
def main(): wandb.init(project="automl-gradient-based-nas", name="GDAS-" + "Opt: " + str(args.optimization) + "Search: " + str(args.arch_search_method), config=args, entity="automl") wandb.config.update(args) # adds all of the arguments as config variables global is_multi_gpu gpus = [int(i) for i in args.gpu.split(',')] logging.info('gpus = %s' % gpus) if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %s' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() # default: args.init_channels = 16, CIFAR_CLASSES = 10, args.layers = 8 if args.arch_search_method == "DARTS": model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) elif args.arch_search_method == "GDAS": model = Network_GumbelSoftmax(args.init_channels, CIFAR_CLASSES, args.layers, criterion) else: raise Exception("search space does not exist!") if len(gpus) > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) is_multi_gpu = True model.cuda() wandb.watch(model) arch_parameters = model.module.arch_parameters( ) if is_multi_gpu else model.arch_parameters() arch_params = list(map(id, arch_parameters)) parameters = model.module.parameters( ) if is_multi_gpu else model.parameters() weight_params = filter(lambda p: id(p) not in arch_params, parameters) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( weight_params, # model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) # will cost time to download the data train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # split index train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, criterion, args) best_accuracy = 0 table = wandb.Table(columns=["Epoch", "Searched Architecture"]) for epoch in range(args.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.module.genotype() if is_multi_gpu else model.genotype( ) logging.info('genotype = %s', genotype) wandb.log({"genotype": str(genotype)}, step=epoch) table.add_data(str(epoch), str(genotype)) wandb.log({"Searched Architecture": table}) print( F.softmax(model.module.alphas_normal if is_multi_gpu else model.alphas_normal, dim=-1)) print( F.softmax(model.module.alphas_reduce if is_multi_gpu else model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(epoch, train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) wandb.log({"searching_train_acc": train_acc, "epoch": epoch}) # validation with torch.no_grad(): valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) wandb.log({"searching_valid_acc": valid_acc, "epoch": epoch}) scheduler.step() if valid_acc > best_accuracy: wandb.run.summary["best_valid_accuracy"] = valid_acc best_accuracy = valid_acc # utils.save(model, os.path.join(args.save, 'weights.pt')) utils.save(model, os.path.join(wandb.run.dir, 'weights.pt'))
def main(): np.random.seed(args.seed) torch.manual_seed(args.seed) if args.gpu != -1: if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) torch.cuda.set_device(args.gpu) cudnn.benchmark = True cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) else: logging.info('using cpu') if args.dyno_schedule: args.threshold_divider = np.exp(-np.log(args.threshold_multiplier) * args.schedfreq) print( args.threshold_divider, -np.log(args.threshold_multiplier) / np.log(args.threshold_divider)) if args.dyno_split: args.train_portion = 1 - 1 / (1 + args.schedfreq) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() if args.gpu != -1: criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, args.rho, args.crb, args.epochs, args.gpu, ewma=args.ewma, reg=args.reg) if args.gpu != -1: model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) datapath = os.path.join(utils.get_dir(), args.data) if args.task == "CIFAR100cf": train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = utils.CIFAR100C2F(root=datapath, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * len(indices))) orig_num_train = len(indices[:split]) orig_num_valid = len(indices[split:num_train]) train_indices = train_data.filter_by_fine(args.train_filter, indices[:split]) valid_indices = train_data.filter_by_fine(args.valid_filter, indices[split:num_train]) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=utils.FillingSubsetRandomSampler(train_indices, orig_num_train, reshuffle=True), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=utils.FillingSubsetRandomSampler(valid_indices, orig_num_valid, reshuffle=True), pin_memory=True, num_workers=2) # TODO: extend each epoch or multiply number of epochs by 20%*args.class_filter elif args.task == "CIFAR100split": train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = utils.CIFAR100C2F(root=datapath, train=True, download=True, transform=train_transform) if not args.evensplit: train_indices, valid_indices = train_data.split(args.train_portion) else: num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_indices = indices[:split] valid_indices = indices[split:num_train] train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_indices), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( valid_indices), pin_memory=True, num_workers=2) else: if args.task == "CIFAR100": train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = dset.CIFAR100(root=datapath, train=True, download=True, transform=train_transform) else: train_transform, valid_transform = utils._data_transforms_cifar10( args) train_data = dset.CIFAR10(root=datapath, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_indices = indices[:split] valid_indices = indices[split:num_train] train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_indices), pin_memory=True, num_workers=4) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( valid_indices), pin_memory=True, num_workers=4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, int(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) loggers = { "train": { "loss": [], "acc": [], "step": [] }, "val": { "loss": [], "acc": [], "step": [] }, "infer": { "loss": [], "acc": [], "step": [] }, "ath": { "threshold": [], "step": [] }, "astep": [], "zustep": [] } alpha_threshold = args.init_alpha_threshold alpha_counter = 0 ewma = -1 for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_last_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) if args.ckpt_interval > 0 and epoch > 0 and ( epoch) % args.ckpt_interval == 0: logging.info('checkpointing genotype') os.mkdir(os.path.join(args.save, 'genotypes', str(epoch))) with open( os.path.join(args.save, 'genotypes', str(epoch), 'genotype.txt'), "w") as f: f.write(str(genotype)) print(model.activate(model.alphas_normal)) print(model.activate(model.alphas_reduce)) # training train_acc, train_obj, alpha_threshold, alpha_counter, ewma = train( train_queue, valid_queue, model, architect, criterion, optimizer, loggers, alpha_threshold, alpha_counter, ewma, args) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) utils.log_loss(loggers["infer"], valid_obj, valid_acc, model.clock) logging.info('valid_acc %f', valid_acc) utils.plot_loss_acc(loggers, args.save) utils.save_file(recoder=model.alphas_normal_history, path=os.path.join(args.save, 'Normalalpha'), steps=loggers["train"]["step"]) utils.save_file(recoder=model.alphas_reduce_history, path=os.path.join(args.save, 'Reducealpha'), steps=loggers["train"]["step"]) utils.plot_FI(loggers["train"]["step"], model.FI_history, args.save, "FI", loggers["ath"], loggers['astep']) utils.plot_FI(loggers["train"]["step"], model.FI_ewma_history, args.save, "FI_ewma", loggers["ath"], loggers['astep']) utils.save(model, os.path.join(args.save, 'weights.pt')) genotype = model.genotype() logging.info('genotype = %s', genotype) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close()
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) # Create tensorboard logger writer_dict = { 'writer': SummaryWriter(path_helper['log']), 'inner_steps': 0, 'val_steps': 0, 'valid_global_steps': 0 } # set tf env if args.eval: _init_inception() inception_path = check_or_download_inception(None) create_inception_graph(inception_path) # fid_stat if args.dataset.lower() == 'cifar10': fid_stat = 'fid_stat/fid_stats_cifar10_train.npz' elif args.dataset.lower() == 'stl10': fid_stat = 'fid_stat/stl10_train_unlabeled_fid_stats_48.npz' elif args.dataset.lower() == 'mnist': fid_stat = 'fid_stat/stl10_train_unlabeled_fid_stats_48.npz' else: raise NotImplementedError(f'no fid stat for {args.dataset.lower()}') assert os.path.exists(fid_stat) # initial fixed_z = torch.cuda.FloatTensor( np.random.normal(0, 1, (25, args.latent_dim))) FID_best = 1e+4 IS_best = 0. FID_best_epoch = 0 IS_best_epoch = 0 # build gen and dis gen = eval('model_search_gan.' + args.gen)(args) gen = gen.cuda() dis = eval('model_search_gan.' + args.dis)(args) dis = dis.cuda() logging.info("generator param size = %fMB", utils.count_parameters_in_MB(gen)) logging.info("discriminator param size = %fMB", utils.count_parameters_in_MB(dis)) if args.parallel: gen = nn.DataParallel(gen) dis = nn.DataParallel(dis) # resume training if args.load_path != '': gen.load_state_dict( torch.load( os.path.join(args.load_path, 'model', 'weights_gen_' + 'last' + '.pt'))) dis.load_state_dict( torch.load( os.path.join(args.load_path, 'model', 'weights_dis_' + 'last' + '.pt'))) # set optimizer for parameters W of gen and dis gen_optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, gen.parameters()), args.g_lr, (args.beta1, args.beta2)) dis_optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, dis.parameters()), args.d_lr, (args.beta1, args.beta2)) # set moving average parameters for generator gen_avg_param = copy_params(gen) img_size = 8 if args.grow else args.img_size train_transform, valid_transform = eval('utils.' + '_data_transforms_' + args.dataset + '_resize')(args, img_size) if args.dataset == 'cifar10': train_data = eval('dset.' + dataset[args.dataset])( root=args.data, train=True, download=True, transform=train_transform) elif args.dataset == 'stl10': train_data = eval('dset.' + dataset[args.dataset])( root=args.data, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.gen_batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.gen_batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) logging.info('length of train_queue is {}'.format(len(train_queue))) logging.info('length of valid_queue is {}'.format(len(valid_queue))) max_iter = len(train_queue) * args.epochs scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( gen_optimizer, float(args.epochs), eta_min=args.learning_rate_min) gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0, max_iter * args.n_critic) dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0, max_iter * args.n_critic) architect = Architect_gen(gen, dis, args, 'duality_gap_with_mm', logging) gen.set_gumbel(args.use_gumbel) dis.set_gumbel(args.use_gumbel) for epoch in range(args.start_epoch + 1, args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) logging.info('epoch %d gen_lr %e', epoch, args.g_lr) logging.info('epoch %d dis_lr %e', epoch, args.d_lr) genotype_gen = gen.genotype() logging.info('gen_genotype = %s', genotype_gen) if 'Discriminator' not in args.dis: genotype_dis = dis.genotype() logging.info('dis_genotype = %s', genotype_dis) print('up_1: ', F.softmax(gen.alphas_up_1, dim=-1)) print('up_2: ', F.softmax(gen.alphas_up_2, dim=-1)) print('up_3: ', F.softmax(gen.alphas_up_3, dim=-1)) # determine whether use gumbel or not if epoch == args.fix_alphas_epochs + 1: gen.set_gumbel(args.use_gumbel) dis.set_gumbel(args.use_gumbel) # grow discriminator and generator if args.grow: dis.cur_stage = grow_ctrl(epoch, args.grow_epoch) gen.cur_stage = grow_ctrl(epoch, args.grow_epoch) if args.restrict_dis_grow and dis.cur_stage > 1: dis.cur_stage = 1 print('debug: dis.cur_stage is {}'.format(dis.cur_stage)) if epoch in args.grow_epoch: train_transform, valid_transform = utils._data_transforms_cifar10_resize( args, 2**(gen.cur_stage + 3)) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.gen_batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.gen_batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) else: gen.cur_stage = 2 dis.cur_stage = 2 # training parameters train_gan_parameter(args, train_queue, gen, dis, gen_optimizer, dis_optimizer, gen_avg_param, logging, writer_dict) # training alphas if epoch > args.fix_alphas_epochs: train_gan_alpha(args, train_queue, valid_queue, gen, dis, architect, gen_optimizer, gen_avg_param, epoch, lr, writer_dict, logging) # evaluate the IS and FID if args.eval and epoch % args.eval_every == 0: inception_score, std, fid_score = validate(args, fixed_z, fid_stat, gen, writer_dict, path_helper) logging.info('epoch {}: IS is {}+-{}, FID is {}'.format( epoch, inception_score, std, fid_score)) if inception_score > IS_best: IS_best = inception_score IS_epoch_best = epoch if fid_score < FID_best: FID_best = fid_score FID_epoch_best = epoch logging.info('best epoch {}: IS is {}'.format( IS_best_epoch, IS_best)) logging.info('best epoch {}: FID is {}'.format( FID_best_epoch, FID_best)) utils.save( gen, os.path.join(path_helper['model'], 'weights_gen_{}.pt'.format('last'))) utils.save( dis, os.path.join(path_helper['model'], 'weights_dis_{}.pt'.format('last'))) genotype_gen = gen.genotype() if 'Discriminator' not in args.dis: genotype_dis = dis.genotype() logging.info('best epoch {}: IS is {}'.format(IS_best_epoch, IS_best)) logging.info('best epoch {}: FID is {}'.format(FID_best_epoch, FID_best)) logging.info('final discovered gen_arch is {}'.format(genotype_gen)) if 'Discriminator' not in args.dis: logging.info('final discovered dis_arch is {}'.format(genotype_dis))
def main(): seed = args.seed np.random.seed(seed) cudnn.benchmark = True torch.manual_seed(seed) cudnn.enabled = True torch.cuda.manual_seed(seed) timestamp = str(utils.get_unix_timestamp()) path = os.path.join(args.save, timestamp) logger = utils.get_logger(args.save, timestamp, file_type='txt') tb_logger = tensorboardX.SummaryWriter('../runs/{}'.format(timestamp)) logger.info("time = %s, args = %s", str(utils.get_unix_timestamp()), args) train_data, test_data, input_shape = utils.get_data( args.data, args.observ_window, args.downsampling, args.multi_slice) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) test_queue = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) model = Network(input_shape, args.num_drones) model = model.to(device) criterion = nn.CrossEntropyLoss() criterion = criterion.to(device) optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) for epoch in range(args.epochs): lr = scheduler.get_lr()[0] logger.info('time = %s, epoch %d lr %e', str(utils.get_unix_timestamp()), epoch, lr) print('time = {}, epoch {} lr {}'.format( str(utils.get_unix_timestamp()), epoch, lr)) model.train() train_loss, train_acc = train(train_queue, model, criterion, optimizer, logger) logger.info('time = %s, train_loss %f train_acc %f', str(utils.get_unix_timestamp()), train_loss, train_acc) print('time = {}, train_loss {} train_acc {}'.format( str(utils.get_unix_timestamp()), train_loss, train_acc)) tb_logger.add_scalar("epoch_train_loss", train_loss, epoch) tb_logger.add_scalar("epoch_train_acc", train_acc, epoch) scheduler.step() model.eval() test_loss, test_acc = test(test_queue, model, criterion, logger) logger.info('time = %s, test_loss %f test_acc %f', str(utils.get_unix_timestamp()), test_loss, test_acc) print('time = {}, test_loss {} test_acc {}'.format( str(utils.get_unix_timestamp()), test_loss, test_acc)) tb_logger.add_scalar("epoch_test_loss", test_loss, epoch) tb_logger.add_scalar("epoch_test_acc", test_acc, epoch) utils.save(model, os.path.join(path, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs)) for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('train_acc %f', train_acc) valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.MSELoss() criterion = criterion.cuda() model = Network(args.init_channels, 1, args.layers, criterion, input_channels=4) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # dataset = utils.BathymetryDataset(args, "guyane/guyane.csv") # dataset.add(args, "saint_louis/saint_louis.csv") dataset = utils.BathymetryDataset(args, "../mixed_train.csv", to_filter=False) dataset.add(args, "../mixed_validation.csv", to_balance=False) trains, vals = dataset.get_subset_indices(args.train_portion) train_queue = torch.utils.data.DataLoader( dataset, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(trains), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( dataset, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(vals), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, int(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) loggers = {"train": {"loss": [], "step": []}, "val": {"loss": [], "step": []}, "infer": {"loss": [], "step": []}} for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_last_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training _ = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, loggers) # validation infer_loss = infer(valid_queue, model, criterion) utils.log_loss(loggers["infer"], infer_loss, None, model.clock) utils.plot_loss_acc(loggers, args.save) model.update_history() utils.save_file(recoder=model.alphas_normal_history, path=os.path.join(args.save, 'normal')) utils.save_file(recoder=model.alphas_reduce_history, path=os.path.join(args.save, 'reduce')) utils.save(model, os.path.join(args.save, 'weights.pt')) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) np.save(os.path.join(os.path.join(args.save, 'normal_weight.npy')), F.softmax(model.alphas_normal, dim=-1).data.cpu().numpy()) np.save(os.path.join(os.path.join(args.save, 'reduce_weight.npy')), F.softmax(model.alphas_reduce, dim=-1).data.cpu().numpy()) genotype = model.genotype() logging.info('genotype = %s', genotype) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close()
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) datapath = os.path.join(utils.get_dir(), args.data) train_data = dset.CIFAR10(root=datapath, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, int(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) loggers = { "train": { "loss": [], "acc": [], "step": [] }, "val": { "loss": [], "acc": [], "step": [] }, "infer": { "loss": [], "acc": [], "step": [] }, "ath": { "threshold": [], "step": [] }, "astep": [], "zustep": [] } for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_last_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, loggers) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) utils.save_file(recoder=model.alphas_normal_history, path=os.path.join(args.save, 'Normalalpha'), steps=loggers["train"]["step"]) utils.save_file(recoder=model.alphas_reduce_history, path=os.path.join(args.save, 'Reducealpha'), steps=loggers["train"]["step"]) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) np.save(os.path.join(os.path.join(args.save, 'normal_weight.npy')), F.softmax(model.alphas_normal, dim=-1).data.cpu().numpy()) np.save(os.path.join(os.path.join(args.save, 'reduce_weight.npy')), F.softmax(model.alphas_reduce, dim=-1).data.cpu().numpy()) genotype = model.genotype() logging.info('genotype = %s', genotype) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close()
def main(): start_time = time.time() if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) random.seed(args.seed) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) logging.info('gpu device = {}'.format(args.gpu)) logging.info("args = {}".format(args)) #supernet model = model_maker(cell_nums=args.child_num_cells, out_filters=args.child_out_filters, normal_block_repeat=[4, 4], classes=args.num_class, aux=args.child_use_aux_heads) #generator controller = separable_LSTM(2) model.start_conv1 = nn.Sequential( conv2d_std(in_channels=3, out_channels=args.child_out_filters, kernel_size=3, stride=2), nn.BatchNorm2d(args.child_out_filters, track_running_stats=False), Mish(), conv2d_std(in_channels=args.child_out_filters, out_channels=args.child_out_filters, kernel_size=3, stride=2), nn.BatchNorm2d(args.child_out_filters, track_running_stats=False), Mish()) model.start_conv2 = nn.Sequential( conv2d_std(in_channels=3, out_channels=args.child_out_filters, kernel_size=3, stride=2), nn.BatchNorm2d(args.child_out_filters, track_running_stats=False), Mish(), conv2d_std(in_channels=args.child_out_filters, out_channels=args.child_out_filters, kernel_size=3, stride=2), nn.BatchNorm2d(args.child_out_filters, track_running_stats=False), Mish()) logging.info('Total params: {:.6f}M'.format( (sum(p.numel() for p in model.parameters()) / 1000000.0))) optimizer = torch.optim.SGD( model.parameters(), args.child_lr_max, momentum=args.momentum, weight_decay=args.weight_decay, ) #generator's optimizer controller_optimizer = Ranger( controller.parameters(), args.controller_lr, #betas=(0.1,0.999), #eps=1e-3, ) controller.cuda() model.cuda() train_loader, reward_loader, valid_loader = get_loaders(args) #utils.BatchNorm2d_replace(model) model.cuda() model.apply(utils.initialize_weights) parameters = utils.add_weight_decay(model, args.weight_decay) criterion = nn.CrossEntropyLoss( ) #utils.CrossEntropyLabelSmooth(num_classes = 10) model, optimizer = amp.initialize(model, optimizer, opt_level="O0") scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.epochs) #int(args.epochs*0.3)) lr = args.child_lr_max for epoch in tqdm(range(args.epochs)): training_start = time.time() logging.info('epoch {:0>3d} lr {:.6f}'.format(epoch, lr)) # training drop_prob = args.droppath * epoch / args.epochs model.drop_path_prob(drop_prob) starter = True if epoch == 0 else False train_acc = train(train_loader, model, controller, optimizer, criterion, start=starter) scheduler.step() lr = scheduler.get_lr()[-1] logging.info('train_acc {:.3f}'.format(train_acc)) train_controller(reward_loader, model, controller, controller_optimizer) # validation valid_acc = infer(valid_loader, model, controller, criterion) logging.info('valid_acc {:.3f}'.format(valid_acc)) if (epoch + 1) % args.report_freq == 0: utils.save(model, os.path.join(args.save, 'weights.pt')) utils.save(controller, os.path.join(args.save, 'controller.pt')) epoch_inter_time = int(time.time() - training_start) #print(f'Trainging 1 Epoch ,Total time consumption {epoch_inter_time} /s ') print('Trainging 1 Epoch ,Total time consumption {} /s '.format( epoch_inter_time)) #logging.info(f'Trainging Complete ,Total time consumption {int(time.time()-start_time)} /s ') logging.info('Trainging Complete ,Total time consumption {} /s '.format( int(time.time() - start_time)))
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) logging.info("unparsed args = %s", unparsed) # prepare dataset if args.is_cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.is_cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=False, transform=train_transform) valid_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=False, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=False, transform=train_transform) valid_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=False, transform=valid_transform) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() supernet = Network(args.init_channels, CIFAR_CLASSES, args.layers) supernet.cuda() if args.is_cifar100: weight_decay = 5e-4 else: weight_decay = 3e-4 optimizer = torch.optim.SGD( supernet.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=weight_decay, ) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) for epoch in range(args.epochs): logging.info('epoch %d lr %e', epoch, scheduler.get_last_lr()[0]) train_acc, train_obj = train(train_queue, supernet, criterion, optimizer) logging.info('train_acc %f', train_acc) valid_top1 = utils.AverageMeter() for i in range(args.eval_time): supernet.generate_share_alphas() ops_alps = supernet.cells[0].ops_alphas subnet = supernet.get_sub_net(ops_alps) valid_acc, valid_obj = infer(valid_queue, subnet, criterion) valid_top1.update(valid_acc) logging.info('Mean Valid Acc: %f', valid_top1.avg) scheduler.step() utils.save(supernet, os.path.join(args.save, 'supernet_weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') # sys.exit(1) else: # torch.cuda.set_device(args.gpu) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) # genotype = eval("genotypes.%s" % args.arch) # model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = Network(CIFAR_CLASSES) model = model.to(device) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.to(device) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) pin_memory = True if torch.cuda.is_available() else False train_queue = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=pin_memory, num_workers=2) valid_queue = DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=pin_memory, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs)) results = dict() for epoch in range(args.epochs): # scheduler.step() # 1.1以上ではoptimizer.step()の後に行う logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('train_acc %f', train_acc) writer.add_scalar("train_acc", train_acc, epoch) results["train_acc"] = train_acc valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) writer.add_scalar("valid_acc", valid_acc, epoch) results["valid_acc"] = valid_acc utils.save(model, os.path.join(args.save, 'weights.pt')) scheduler.step() with mlflow.start_run() as run: # Log args into mlflow for key, value in vars(args).items(): mlflow.log_param(key, value) # Log results into mlflow for key, value in results.items(): mlflow.log_metric(key, value) # Log other info mlflow.log_param('loss_type', 'CrossEntropy') # Log model mlflow.pytorch.log_model(model, "model")
def main(): np.random.seed(args.seed) torch.cuda.set_device(device) cudnn.benchmark = True cudnn.enabled = True torch.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) train_transform, valid_transform = utils._data_transforms_cifar10(args) # Load dataset if args.dataset == 'cifar10': if args.gold_fraction == 0: train_data = CIFAR10(root=args.data, train=True, gold=False, gold_fraction=args.gold_fraction, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) else: train_data = CIFAR10(root=args.data, train=True, gold=True, gold_fraction=args.gold_fraction, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) num_classes = 10 elif args.dataset == 'cifar100': if args.gold_fraction == 0: train_data = CIFAR100(root=args.data, train=True, gold=False, gold_fraction=args.gold_fraction, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) else: train_data = CIFAR100(root=args.data, train=True, gold=True, gold_fraction=args.gold_fraction, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) valid_data = dset.CIFAR100(root=args.data, train=False, download=True, transform=valid_transform) num_classes = 100 genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_ch, num_classes, args.layers, args.auxiliary, genotype).cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) if args.loss_func == 'cce': criterion = nn.CrossEntropyLoss().to(device) elif args.loss_func == 'rll': criterion = utils.RobustLogLoss().to(device) else: assert False, "Invalid loss function '{}' given. Must be in {'cce', 'rll'}".format( args.loss_func) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.wd, nesterov=True) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batchsz, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batchsz, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs)) for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc: %f', valid_acc) train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('train_acc: %f', train_acc) utils.save(model, os.path.join(args.save, 'trained.pt')) print('saved to: trained.pt')
def main(): utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) print(args) # Basic Setup np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.set_device(2) cudnn.benchmark = True cudnn.enabled = True n_channels = 3 n_bins = 2.**args.n_bits approx_samples = 4 # Define model model_single = Network(n_channels, args.n_flow, args.n_block, n_bins, affine=args.affine, conv_lu=not args.no_lu) model = nn.DataParallel(model_single, device_ids=[2, 3]) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), args.learning_rate) dataset = iter(sample_cifar10(args.batch, args.img_size)) # Sample generated images z_sample = [] z_shapes = calc_z_shapes(n_channels, args.img_size, args.n_flow, args.n_block) for z in z_shapes: z_new = torch.randn(args.n_sample, *z) * args.temp z_sample.append(z_new.to(device)) with tqdm(range(args.iter)) as pbar: for i in pbar: # Training procedure model.train() # Get a random minibatch from the search queue with replacement input, _ = next(dataset) input = Variable(input, requires_grad=False).cuda(non_blocking=True) input = input.repeat(approx_samples, 1, 1, 1) log_p, logdet, _ = model(input + torch.rand_like(input) / n_bins) loss, _, _ = likelihood_loss(log_p, logdet, args.img_size, n_bins) loss_variance = likelihood_loss_variance(log_p, logdet, args.img_size, n_bins, approx_samples) loss = loss + loss_variance # Optimize model optimizer.zero_grad() loss.backward() optimizer.step() pbar.set_description("Loss: {}".format(loss.item())) # Save generated samples if i % 100 == 0: with torch.no_grad(): tvutils.save_image( model_single.reverse(z_sample).cpu().data, "{}/samples/{}.png".format(args.save, str(i + 1).zfill(6)), normalize=False, nrow=10, ) # Save checkpoint if i % 1000 == 0: model_single.genotype() torch.save( model.state_dict(), "{}/checkpoint/model_{}.pt".format(args.save, str(i + 1).zfill(6))) # Save latest weights utils.save(model, os.path.join(args.save, 'latest_weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) # Set random seeds. np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('args = %s', args) # Get data loaders. train_queue, valid_queue, num_classes = datasets.get_loaders( args, 'search') # Set up the network and criterion. model = Network(num_classes=num_classes, layers=args.layers, dataset=args.dataset) model = model.cuda() alpha = Alpha(num_normal=model.layers - len(model.channel_change_layers), num_reduce=len(model.channel_change_layers), num_op_normal=len(BLOCK_PRIMITIVES), num_op_reduce=len(REDUCTION_PRIMITIVES), gsm_soften_eps=args.gsm_soften_eps, gsm_temperature=args.gumbel_soft_temp, gsm_type=args.gsm_type, same_alpha_minibatch=args.same_alpha_minibatch) alpha = alpha.cuda() model = DDP(model, delay_allreduce=True) alpha = DDP(alpha, delay_allreduce=True) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() logging.info('param size = %fM ', utils.count_parameters_in_M(model)) writer.add_scalar('temperature', args.gumbel_soft_temp) # Get weight params, and arch params. weight_params = [p for p in model.parameters()] arch_params = [p for p in alpha.parameters()] logging.info('#Weight params: %d, #Arch params: %d' % (len(weight_params), len(arch_params))) # Initial weight pretraining. def run_train_init(): logging.info('running init epochs.') opt = torch.optim.Adam(weight_params, args.learning_rate, weight_decay=args.weight_decay) for e in range(args.init_epochs): # Shuffle the sampler. train_queue.sampler.set_epoch(e + args.seed) train_acc, train_obj = train_init(train_queue, model, alpha, criterion, opt, weight_params) logging.info('train_init_acc %f', train_acc) valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_init_acc %f', valid_acc) memory_cached, memory_alloc = utils.get_memory_usage(device=0) logging.info('memory_cached %0.3f memory_alloc %0.3f' % (memory_cached, memory_alloc)) if args.local_rank == 0: # if you are performing many searches, you can store the pretrained model. torch.save(model.module.state_dict(), args.pretrained_model) if args.init_epochs: # if you are performing many searches, you can store the pretrained model. if os.path.isfile(args.pretrained_model) and False: logging.info('loading pretrained model.') # load to cpu to avoid loading all params to GPU0 param = torch.load(args.pretrained_model, map_location='cpu') d = torch.device("cuda") model.module.load_state_dict(param, strict=False) model.to(d) else: run_train_init() # Set up network weights optimizer. optimizer = torch.optim.Adam(weight_params, args.learning_rate, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) # Wrap model in UNAS nas = UNAS(model, alpha, args, writer, logging) global_step = 0 for epoch in range(args.epochs): # Shuffle the sampler, update lrs. train_queue.sampler.set_epoch(epoch + args.seed) scheduler.step() nas.arch_scheduler.step() # Logging. if args.local_rank == 0: memory_cached, memory_alloc = utils.get_memory_usage(device=0) writer.add_scalar('memory/cached', memory_cached, global_step) writer.add_scalar('memory/alloc', memory_alloc, global_step) logging.info('memory_cached %0.3f memory_alloc %0.3f' % (memory_cached, memory_alloc)) logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) writer.add_scalar('train/lr', scheduler.get_lr()[0], global_step) writer.add_scalar('train/arc_lr', nas.arch_scheduler.get_lr()[0], global_step) prob = F.softmax(alpha.module.alphas_normal, dim=-1) logging.info('alphas_normal:') logging.info(prob) fig = alpha.module.plot_alphas(BLOCK_PRIMITIVES, is_normal=True) writer.add_figure('weights/disp_normal', fig, global_step) prob = F.softmax(alpha.module.alphas_reduce, dim=-1) logging.info('alphas_reduce:') logging.info(prob) fig = alpha.module.plot_alphas(REDUCTION_PRIMITIVES, is_normal=False) writer.add_figure('weights/disp_reduce', fig, global_step) # Training. train_acc, train_obj, global_step = train(train_queue, valid_queue, model, alpha, nas, criterion, optimizer, global_step, weight_params, args.seed) logging.info('train_acc %f', train_acc) writer.add_scalar('train/acc', train_acc, global_step) # Validation. valid_queue.sampler.set_epoch(0) valid_acc, valid_obj = infer(valid_queue, model, alpha, criterion) logging.info('valid_acc %f', valid_acc) writer.add_scalar('val/acc', valid_acc, global_step) writer.add_scalar('val/loss', valid_obj, global_step) if args.local_rank == 0: logging.info('Saving the model and genotype.') utils.save(model, os.path.join(args.save, 'weights.pt')) torch.save( alpha.module.genotype(BLOCK_PRIMITIVES, REDUCTION_PRIMITIVES), os.path.join(args.save, 'genotype.pt')) writer.flush()
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, k=args.k) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.dataset == 'cifar100': train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True) architect = Architect(model, args) # configure progressive parameter epoch = 0 ks = [6, 4] num_keeps = [7, 4] train_epochs = [2, 2] if 'debug' in args.save else [25, 25] scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(sum(train_epochs)), eta_min=args.learning_rate_min) for i, current_epochs in enumerate(train_epochs): for e in range(current_epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) model.show_arch_parameters() # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, e) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) epoch += 1 scheduler.step() utils.save(model, os.path.join(args.save, 'weights.pt')) if not i == len(train_epochs) - 1: model.pruning(num_keeps[i + 1]) # architect.pruning([model.mask_normal, model.mask_reduce]) model.wider(ks[i + 1]) optimizer = configure_optimizer( optimizer, torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)) scheduler = configure_scheduler( scheduler, torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(sum(train_epochs)), eta_min=args.learning_rate_min)) logging.info('pruning finish, %d ops left per edge', num_keeps[i + 1]) logging.info('network wider finish, current pc parameter %d', ks[i + 1]) genotype = model.genotype() logging.info('genotype = %s', genotype) model.show_arch_parameters()
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.backends.cudnn.enabled = True logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) model = get_model(args.model_name) model.drop_path_prob = 0. macs, params = profile(model, inputs=(torch.randn(1, 3, 32, 32), )) macs, params = macs / 1000. / 1000., params / 1000. / 1000. logging.info("The parameter size is: {0}".format((params))) logging.info("The FLOPS is: {0}".format(macs)) model = torch.nn.DataParallel(model) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs)) best_acc = 0. for epoch in range(args.epochs): logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('train_acc %f', train_acc) valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) scheduler.step() if best_acc < valid_acc: best_acc = valid_acc logging.info("Current best Prec@1 = %f", best_acc) utils.save(model, os.path.join(args.save, 'best.pt')) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): # Select the search space to search in if args.search_space == '1': search_space = SearchSpace1() elif args.search_space == '2': search_space = SearchSpace2() elif args.search_space == '3': search_space = SearchSpace3() else: raise ValueError('Unknown search space') if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, output_weights=args.output_weights, steps=search_space.num_intermediate_nodes, search_space=search_space) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # Save the one shot model architecture weights for later analysis filehandler = open( os.path.join(args.save, 'one_shot_architecture_{}.obj'.format(epoch)), 'wb') numpy_tensor_list = [] for tensor in model.arch_parameters(): numpy_tensor_list.append(tensor.detach().cpu().numpy()) pickle.dump(numpy_tensor_list, filehandler) # Save the entire one-shot-model filepath = os.path.join(args.save, 'one_shot_model_{}.obj'.format(epoch)) torch.save(model.state_dict(), filepath) logging.info('architecture', numpy_tensor_list) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): wandb.init( project="automl-gradient-based-nas", name="hw" + str(args.arch), config=args, entity="automl" ) wandb.config.update(args) # adds all of the arguments as config variables global is_multi_gpu if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) gpus = [int(i) for i in args.gpu.split(',')] logging.info('gpus = %s' % gpus) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %s' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) if len(gpus) > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) is_multi_gpu = True model.cuda() weight_params = model.module.parameters() if is_multi_gpu else model.parameters() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) wandb.run.summary["param_size"] = utils.count_parameters_in_MB(model) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD( weight_params, # model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay ) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs)) best_accuracy = 0 for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('train_acc %f', train_acc) wandb.log({"evaluation_train_acc": train_acc, 'epoch': epoch}) valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) wandb.log({"evaluation_valid_acc": valid_acc, 'epoch': epoch}) if valid_acc > best_accuracy: wandb.run.summary["best_valid_accuracy"] = valid_acc wandb.run.summary["epoch_of_best_accuracy"] = epoch best_accuracy = valid_acc utils.save(model, os.path.join(wandb.run.dir, 'weights-best.pt')) utils.save(model, os.path.join(wandb.run.dir, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.super_seed) cudnn.benchmark = True torch.manual_seed(args.super_seed) cudnn.enabled = True torch.cuda.manual_seed(args.super_seed) logging.info("args = %s", args) logging.info("unparsed args = %s", unparsed) # prepare dataset if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100(args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers, drop_last=True) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers, drop_last=True) ood_queues = {} for k in ['svhn', 'lsun_resized', 'imnet_resized']: ood_path = os.path.join(args.ood_dir, k) dset_ = dset.ImageFolder(ood_path, valid_transform) loader = torch.utils.data.DataLoader( dset_, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers ) ood_queues[k] = loader # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() supernet = Network( args.init_channels, CIFAR_CLASSES, args.layers, combine_method=args.feat_comb, is_cosine=args.is_cosine, ) supernet.cuda() supernet.generate_share_alphas() #This is to prevent supernet alpha attribute being None type alphas_path = './results/{}/eval_out/{}/alphas.pt'.format(args.load_at.split('/')[2], args.folder) logging.info('Loading alphas at: %s' % alphas_path) alphas = torch.load(alphas_path) subnet = supernet.get_sub_net(alphas[:, :-1]) logging.info(alphas) if args.cifar100: weight_decay = 5e-4 else: weight_decay = 3e-4 optimizer = torch.optim.SGD( subnet.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=weight_decay, ) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs), eta_min=args.learning_rate_min) for epoch in range(args.epochs): logging.info('epoch {} lr {:.4f}'.format(epoch, scheduler.get_last_lr()[0])) train_acc, _ = train(train_queue, subnet, criterion, optimizer) logging.info('train_acc {:.2f}'.format(train_acc)) valid_acc, valid_loss = infer(valid_queue, subnet, criterion) writer_va.add_scalar('loss', valid_loss, global_step) writer_va.add_scalar('acc', valid_acc, global_step) logging.info('valid_acc {:.2f}'.format(valid_acc)) scheduler.step() if not os.path.exists(args.ckpt_path): os.makedirs(args.ckpt_path) utils.save(subnet, os.path.join(args.ckpt_path, 'subnet_{}_weights.pt'.format(args.folder))) lg_aucs, sm_aucs, ent_aucs = ood_eval(valid_queue, ood_queues, subnet, criterion) logging.info('Writting results:') out_dir = './results/{}/eval_out/{}/'.format(args.load_at.split('/')[2], args.folder) with open(os.path.join(out_dir, 'subnet_scratch.txt'), 'w') as f: f.write('-'.join([str(valid_acc), str(lg_aucs), str(sm_aucs), str(ent_aucs)]))
def train_search(gpu,args): print('START TRAIN') # Setting random seed print("Setting random seed",args.seed) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) torch.cuda.set_device(gpu) num_gpu = len([int(i) for i in args.gpu.split(',')]) rank = args.nr * num_gpu + gpu dist.init_process_group(backend= 'nccl', init_method='env://', world_size=args.world_size, rank=rank) # loss function criterion = nn.CrossEntropyLoss() criterion = criterion.cuda(gpu) # 初始化模型,构建一个超网,并将其部署到GPU上 model = Network(args.init_channels, args.CIFAR_CLASSES, args.layers, criterion) model = model.cuda(gpu) arch_params = list(map(id, model.arch_parameters())) weight_params = filter(lambda p: id(p) not in arch_params, #暂时没看到怎么用 model.parameters()) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), # 优化器更新的参数 # weight_params, args.learning_rate, # 学习率 momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) # dset:torchvision.dataset的缩写 ''' # FIXME: 在Distributed DataParallel中,看起来无法通过直接指定indices分割数据集 num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # print("使用多线程做dataloader会报错!") # 数据集划分为训练和验证集,并打包成有序的结构 ''' train_sampler = torch.utils.data.distributed.DistributedSampler(train_data, num_replicas= args.world_size, rank= rank) train_queue = torch.utils.data.DataLoader( dataset= train_data, batch_size= args.batch_size, shuffle= False, sampler= train_sampler, pin_memory= True, num_workers= 0) valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) # valid_sampler = torch.utils.data.distributed.DistributedSampler( # valid_data, # num_replicas= args.world_size, # rank= rank # ) valid_queue = torch.utils.data.DataLoader( dataset= valid_data, batch_size=args.batch_size, pin_memory=True, num_workers=0) ''' # FIXME: train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=0) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=0) ''' # 在Architecture中创建架构参数和架构参数更新函数 architect = Architect(model, criterion, args) #有一个专门的architect.py 不知道是干嘛的,train要输入 model = nn.parallel.DistributedDataParallel(model,device_ids=[gpu]) for epoch in range(args.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.module.genotype() # model_search.py里待搜索的Network类型自带的参数 logging.info('genotype = %s', genotype)# 打印当前epoch 的cell的网络结构 print(F.softmax(model.module.alphas_normal, dim=-1)) print(F.softmax(model.module.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, args, gpu) logging.info('train_acc %f', train_acc) # validation with torch.no_grad(): valid_acc, valid_obj = infer(valid_queue, model.module, criterion, args, gpu) logging.info('valid_acc %f', valid_acc) scheduler.step() if gpu == 0: utils.save(model.module, os.path.join(args.save, 'weights.pt'))