def __init__(self, num_cells, num_class=10, input_size=None, lr=0.025, lr_a=3e-4, lr_min=0.001, momentum=0.9, weight_decay=3e-4, weight_decay_a=1e-3, grad_clip=5, unrolled=False, device='cuda: 0', writer=None, exp_name=None, save_name='EXP', args=None): self.num_cells = num_cells self.num_classes = num_class self.input_size = input_size self.device = device self.writer = writer self.exp_name = exp_name self.lr = lr self.lr_a = lr_a self.lr_min = lr_min self.momentum = momentum self.weight_decay = weight_decay if args.mode == 'search': self.lr = args.c_lr self.lr_a = args.c_lr_a self.weight_decay = args.c_lamb self.c_epochs = args.c_epochs self.c_batch = args.c_batch self.grad_clip = grad_clip self.save_name = save_name self.criterion = nn.CrossEntropyLoss().to(device) self.model = BasicNetwork(self.input_size[0], 16, self.num_classes, self.num_cells, self.criterion, device=self.device).to(device) logging.info("param size = %fMB", utils.count_parameters_in_MB(self.model)) self.optimizer = torch.optim.SGD(params=self.model.parameters(), lr=self.lr, momentum=self.momentum, weight_decay=self.weight_decay)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype_path = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], 'genotype.txt') if os.path.isfile(genotype_path): with open(genotype_path, "r") as f: geno_raw = f.read() genotype = eval(geno_raw) else: genoname = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], 'genoname.txt') if os.path.isfile(genoname): with open(genoname, "r") as f: args.arch = f.read() genotype = eval("genotypes.%s" % args.arch) else: genotype = eval("genotypes.BATH") model = Network(args.init_channels, 1, args.layers, args.auxiliary, genotype, input_channels=4) model = model.cuda() print(os.path.join(utils.get_dir(), args.model_path)) utils.load(model, os.path.join(utils.get_dir(), args.model_path)) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.MSELoss() criterion = criterion.cuda() test_data_tne = utils.BathymetryDataset(args, "../29TNE.csv", root_dir="dataset/bathymetry/29TNE/dataset_29TNE", to_trim="/tmp/pbs.6233542.admin01/tmp_portugal/", to_filter=False) test_queue_tne = torch.utils.data.DataLoader( test_data_tne, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) model.drop_path_prob = args.drop_path_prob test_obj, targets, preds = infer(test_queue_tne, model, criterion, args.depth_normalization) logging.info('test_obj tne %f', test_obj) test_data_tne.write_results(targets, preds, os.path.join(args.save, 'tne_results.csv')) test_data_smd = utils.BathymetryDataset(args, "../29SMD.csv", root_dir="dataset/bathymetry/29SMD/dataset_29SMD", to_trim="/tmp/pbs.6233565.admin01/tmp_portugal/", to_filter=False) test_queue_smd = torch.utils.data.DataLoader( test_data_smd, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) test_obj, targets, preds = infer(test_queue_smd, model, criterion, args.depth_normalization) logging.info('test_obj smd %f', test_obj) test_data_smd.write_results(targets, preds, os.path.join(args.save, 'smd_results.csv'))
def run(self): args = self.args utils = project_utils criterion = nn.CrossEntropyLoss().cuda() eval_criterion = nn.CrossEntropyLoss().cuda() train_queue, valid_queue, test_queue = self.load_dataset() model = DARTSWSNetwork(args.init_channels, 10, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(torch.softmax(model.alphas_normal, dim=-1)) print(torch.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = self.train_fn(train_queue, valid_queue, model, architect=architect, criterion=criterion, optimizer=optimizer, lr=lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = self.eval_fn(test_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save_json(genotype, os.path.join(self.exp_dir, 'arch_pool.json')) if epoch % 30 == 0: shutil.copy( os.path.join(self.exp_dir, 'arch_pool.json'), os.path.join(self.exp_dir, 'arch_pool') + '.{}.json'.format(epoch)) darts_utils.save(model, os.path.join(self.exp_dir, 'weights.pt')) # best choosed dag bestdag = model.genotype() return -1, bestdag
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) if args.gpu == -1: device = torch.device('cpu') else: device = torch.device('cuda:{}'.format(args.gpu)) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, dataset_classes, args.layers, args.auxiliary, genotype) model = Network(args) model = model.to(device) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.to(device) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay ) train_data = MyDataset(args=args, subset='train') valid_data = MyDataset(args=args, subset='valid') train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs)) for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj, trian_fscores, train_MIoU = train(train_queue, model, criterion, optimizer) logging.info('train_acc %f _fscores %f _MIoU %f', train_acc, trian_fscores, train_MIoU) valid_acc, valid_obj, valid_fscores, valid_MIoU = infer(valid_queue, model, criterion) logging.info('valid_acc %f _fscores %f _MIoU %f', valid_acc, valid_fscores, valid_MIoU) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CLASSES, args.layers, args.auxiliary, genotype) model = model.cuda() model.load_state_dict(torch.load(args.model_path)['state_dict']) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() validdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) valid_data = dset.ImageFolder( validdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=4) model.drop_path_prob = args.drop_path_prob valid_acc_top1, valid_acc_top5, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc_top1 %f', valid_acc_top1) logging.info('valid_acc_top5 %f', valid_acc_top5)
def initialize_model(self): """ Initialize model, may change across different model. :return: """ args = self.args if self.args.search_space == 'nasbench': self.model_fn = NasBenchNetSearchDarts self.fixmodel_fn = NasBenchNet model = self.model_fn(args) utils = darts_nasbench_utils else: raise NotImplementedError("Not supported") # finialize model update if args.gpus > 0: if self.args.gpus == 1: model = model.cuda() self.parallel_model = model else: self.model = model self.parallel_model = nn.DataParallel(self.model).cuda() # IPython.embed(header='checking replicas and others.') else: self.parallel_model = model darts = DartsArchitect(model, args=args) model = self.parallel_model # logging.info("DARTS param size = %fMB", utils.count_parameters_in_MB(darts)) self.train_fn = partial(darts_train_model, args=args, architect=darts, sampler=None) self.eval_fn = partial(darts_model_validation, args=args, verbose=True) self.controller = darts logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay, ) # scheduler as Cosine. scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), args.learning_rate_min) return model, optimizer, scheduler, darts, None
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model.drop_path_prob = args.drop_path_prob * 0 / args.epochs flops, params = profile(model, inputs=(torch.randn(1, 3, 32, 32), ), verbose=False) logging.info('flops = %fM', flops / 1e6) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) model = model.cuda() utils.load(model, args.model_path) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() _, test_transform = utils._data_transforms_cifar10(args) test_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=test_transform) test_queue = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) model.drop_path_prob = args.drop_path_prob with torch.no_grad(): test_acc, test_obj = infer(test_queue, model, criterion) logging.info('test_acc %f', test_acc)
def build_cifar100(model_state_dict, optimizer_state_dict, **kwargs): epoch = kwargs.pop('epoch') train_transform, valid_transform = utils._data_transforms_cifar10(args.cutout_size) train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=16) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.eval_batch_size, shuffle=False, pin_memory=True, num_workers=16) model = NASNetworkCIFAR(args, 100, args.layers, args.nodes, args.channels, args.keep_prob, args.drop_path_keep_prob, args.use_aux_head, args.steps, args.arch) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) logging.info("multi adds = %fM", model.multi_adds / 1000000) if model_state_dict is not None: model.load_state_dict(model_state_dict) if torch.cuda.device_count() > 1: logging.info("Use %d %s", torch.cuda.device_count(), "GPUs !") model = nn.DataParallel(model) model = model.cuda() train_criterion = nn.CrossEntropyLoss().cuda() eval_criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD( model.parameters(), args.lr_max, momentum=0.9, weight_decay=args.l2_reg, ) if optimizer_state_dict is not None: optimizer.load_state_dict(optimizer_state_dict) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs), args.lr_min, epoch) return train_queue, valid_queue, model, train_criterion, eval_criterion, optimizer, scheduler
def main(): if not torch.cuda.is_available(): logging.info('No GPU found!') sys.exit(1) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.enabled = True cudnn.benchmark = False cudnn.deterministic = True logging.info("Args = %s", args) nao = NAO( args.encoder_layers, args.encoder_vocab_size, args.encoder_hidden_size, args.encoder_dropout, args.encoder_length, args.source_length, args.encoder_emb_size, args.mlp_layers, args.mlp_hidden_size, args.mlp_dropout, args.decoder_layers, args.decoder_vocab_size, args.decoder_hidden_size, args.decoder_dropout, args.decoder_length, ) logging.info("param size = %fMB", utils.count_parameters_in_MB(nao)) nao = nao.cuda() with open( os.path.join(args.output_dir, 'arch_pool.{}'.format(args.iteration))) as f: arch_pool = f.read().splitlines() arch_pool = list(map(utils.build_dag, arch_pool)) with open( os.path.join(args.output_dir, 'arch_pool.{}.perf'.format(args.iteration))) as f: arch_pool_valid_acc = f.read().splitlines() arch_pool_valid_acc = list(map(float, arch_pool_valid_acc)) logging.info('Training Encoder-Predictor-Decoder') train_encoder_input = list( map( lambda x: utils.parse_arch_to_seq(x[0], 2) + utils. parse_arch_to_seq(x[1], 2), arch_pool)) min_val = min(arch_pool_valid_acc) max_val = max(arch_pool_valid_acc) train_encoder_target = [(i - min_val) / (max_val - min_val) for i in arch_pool_valid_acc] if args.expand is not None: buffer1, buffer2 = [], [] for _ in range(args.expand - 1): for src, tgt in zip(train_encoder_input, train_encoder_target): a = np.random.randint(0, 5) b = np.random.randint(0, 5) src = src[:4 * a] + src[4 * a + 2:4 * a + 4] + \ src[4 * a:4 * a + 2] + src[4 * (a + 1):20 + 4 * b] + \ src[20 + 4 * b + 2:20 + 4 * b + 4] + src[20 + 4 * b:20 + 4 * b + 2] + \ src[20 + 4 * (b + 1):] buffer1.append(src) buffer2.append(tgt) train_encoder_input += buffer1 train_encoder_target += buffer2 nao_train_dataset = utils.NAODataset(train_encoder_input, train_encoder_target, True, swap=True) nao_valid_dataset = utils.NAODataset(train_encoder_input, train_encoder_target, False) nao_train_queue = torch.utils.data.DataLoader(nao_train_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True) nao_valid_queue = torch.utils.data.DataLoader( nao_valid_dataset, batch_size=len(nao_valid_dataset), shuffle=False, pin_memory=True) nao_optimizer = torch.optim.Adam(nao.parameters(), lr=args.lr, weight_decay=args.l2_reg) for nao_epoch in range(1, args.epochs + 1): nao_loss, nao_mse, nao_ce = nao_train(nao_train_queue, nao, nao_optimizer) if nao_epoch % 10 == 0 or nao_epoch == 1: logging.info("epoch %04d train loss %.6f mse %.6f ce %.6f", nao_epoch, nao_loss, nao_mse, nao_ce) if nao_epoch % 100 == 0 or nao_epoch == 1: mse, pa, hs = nao_valid(nao_train_queue, nao) logging.info("Evaluation on train data") logging.info( 'epoch %04d mse %.6f pairwise accuracy %.6f hamming distance %.6f', nao_epoch, mse, pa, hs) mse, pa, hs = nao_valid(nao_valid_queue, nao) logging.info("Evaluation on valid data") logging.info( 'epoch %04d mse %.6f pairwise accuracy %.6f hamming distance %.6f', nao_epoch, mse, pa, hs) new_archs = [] predict_step_size = 0 top_archs = list( map( lambda x: utils.parse_arch_to_seq(x[0], 2) + utils. parse_arch_to_seq(x[1], 2), arch_pool[:args.generate_topk])) nao_infer_dataset = utils.NAODataset(top_archs, None, False) nao_infer_queue = torch.utils.data.DataLoader( nao_infer_dataset, batch_size=len(nao_infer_dataset), shuffle=False, pin_memory=True) while len(new_archs) < args.new_arch: predict_step_size += 1 logging.info('Generate new architectures with step size %d', predict_step_size) new_arch = nao_infer(nao_infer_queue, nao, predict_step_size, direction='+') for arch in new_arch: if arch not in train_encoder_input and arch not in new_archs: new_archs.append(arch) if len(new_archs) >= args.new_arch: break logging.info('%d new archs generated now', len(new_archs)) if predict_step_size > args.max_step_size: break logging.info("Generate %d new archs", len(new_archs)) new_arch_pool = list( map(lambda x: utils.parse_seq_to_arch(x, 2), new_archs)) new_arch_pool = new_arch_pool + arch_pool[:args.remain_topk] with open( os.path.join(args.output_dir, 'new_arch_pool.{}'.format(args.iteration)), 'w') as f: for arch in new_arch_pool: arch = ' '.join(map(str, arch[0] + arch[1])) f.write('{}\n'.format(arch)) logging.info('Finish training!')
def compute(self, x, budget, config, **kwargs): """ Get model with hyperparameters from config generated by get_configspace() """ config = get_config_dictionary(x, config) print("config", config) if (len(config.keys())<len(x)): return 100 if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) gpu = 'cuda:0' np.random.seed(self.seed) torch.cuda.set_device(gpu) cudnn.benchmark = True torch.manual_seed(self.seed) cudnn.enabled=True torch.cuda.manual_seed(self.seed) logging.info('gpu device = %s' % gpu) logging.info("config = %s", config) genotype = eval("genotypes.%s" % 'PCDARTS') model = Network(self.init_channels, self.n_classes, config['n_conv_layers'], genotype) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() if config['optimizer'] == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=config['initial_lr'], momentum=0.9, weight_decay=config['weight_decay'], nesterov=True) else: optimizer = settings.opti_dict[config['optimizer']](model.parameters(), lr=config['initial_lr']) if config['lr_scheduler'] == 'Cosine': lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, int(budget)) elif config['lr_scheduler'] == 'Exponential': lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1) indices = list(range(int(self.split*len(self.train_dataset)))) valid_indices = list(range(int(self.split*len(self.train_dataset)), len(self.train_dataset))) print("Training size=", len(indices)) training_sampler = SubsetRandomSampler(indices) valid_sampler = SubsetRandomSampler(valid_indices) train_queue = torch.utils.data.DataLoader(dataset=self.train_dataset, batch_size=self.batch_size, sampler=training_sampler) valid_queue = torch.utils.data.DataLoader(dataset=self.train_dataset, batch_size=self.batch_size, sampler=valid_sampler) for epoch in range(int(budget)): lr_scheduler.step() logging.info('epoch %d lr %e', epoch, lr_scheduler.get_lr()[0]) model.drop_path_prob = config['drop_path_prob'] * epoch / int(budget) train_acc, train_obj = train(train_queue, model, criterion, optimizer, grad_clip=config['grad_clip_value']) logging.info('train_acc %f', train_acc) valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) return valid_obj # Hyperband always minimizes, so we want to minimise the error, error = 1-acc
def main(): start = time.time() if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) torch.cuda.set_device(config.local_rank % len(config.gpus)) torch.distributed.init_process_group(backend='nccl', init_method='env://') config.world_size = torch.distributed.get_world_size() config.total_batch = config.world_size * config.batch_size np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True CLASSES = 1000 num_training_samples = 1281167 num_batches = num_training_samples // config.batch_size model_name = config.arch if config.epoch_start_cs != -1: config.use_all_channels = True ### Model if model_name == 'ShuffleNas_fixArch': architecture = [ 0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2 ] scale_ids = [ 6, 5, 3, 5, 2, 6, 3, 4, 2, 5, 7, 5, 4, 6, 7, 4, 4, 5, 4, 3 ] # we rescale the channels uniformly to adjust the FLOPs. model = get_shufflenas_oneshot( architecture, scale_ids, use_se=config.use_se, n_class=CLASSES, last_conv_after_pooling=config.last_conv_after_pooling, channels_layout=config.channels_layout) elif model_name == 'ShuffleNas': model = get_shufflenas_oneshot( use_all_blocks=config.use_all_blocks, use_se=config.use_se, n_class=CLASSES, last_conv_after_pooling=config.last_conv_after_pooling, channels_layout=config.channels_layout) else: raise NotImplementedError model = model.to(device) #model.apply(utils.weights_init) #model = DDP(model, delay_allreduce=True) # For solve the custome loss can`t use model.parameters() in apex warpped model via https://github.com/NVIDIA/apex/issues/457 and https://github.com/NVIDIA/apex/issues/107 model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[config.local_rank], output_device=config.local_rank) if model_name == 'ShuffleNas_fixArch': logger.info("param size = %fMB", utils.count_parameters_in_MB(model)) else: logger.info("Train Supernet") # Loss if config.label_smoothing: criterion = CrossEntropyLabelSmooth(CLASSES, config.label_smooth) else: criterion = nn.CrossEntropyLoss() weight = model.parameters() # Optimizer w_optimizer = torch.optim.SGD(weight, config.w_lr, momentum=config.w_momentum, weight_decay=config.w_weight_decay) w_schedule = utils.Schedule(w_optimizer) train_data = get_imagenet_iter_torch( type='train', # image_dir="/googol/atlas/public/cv/ILSVRC/Data/" # use soft link `mkdir ./data/imagenet && ln -s /googol/atlas/public/cv/ILSVRC/Data/CLS-LOC/* ./data/imagenet/` image_dir=config.data_path + config.dataset.lower(), batch_size=config.batch_size, num_threads=config.workers, world_size=config.world_size, local_rank=config.local_rank, crop=224, device_id=config.local_rank, num_gpus=config.gpus, portion=config.train_portion) valid_data = get_imagenet_iter_torch( type='train', # image_dir="/googol/atlas/public/cv/ILSVRC/Data/" # use soft link `mkdir ./data/imagenet && ln -s /googol/atlas/public/cv/ILSVRC/Data/CLS-LOC/* ./data/imagenet/` image_dir=config.data_path + "/" + config.dataset.lower(), batch_size=config.batch_size, num_threads=config.workers, world_size=config.world_size, local_rank=config.local_rank, crop=224, device_id=config.local_rank, num_gpus=config.gpus, portion=config.val_portion) best_top1 = 0. for epoch in range(config.epochs): if epoch < config.warmup_epochs: lr = w_schedule.update_schedule_linear(epoch, config.w_lr, config.w_weight_decay, config.batch_size) else: w_scheduler = w_schedule.get_schedule_cosine( config.w_lr_min, config.epochs) w_scheduler.step() lr = w_scheduler.get_lr()[0] logger.info('epoch %d lr %e', epoch, lr) if epoch > config.epoch_start_cs: config.use_all_channels = False # training train_top1, train_loss = train(train_data, valid_data, model, criterion, w_optimizer, lr, epoch, writer, model_name) logger.info('Train top1 %f', train_top1) # validation top1 = 0 if epoch % 10 == 0: top1, loss = infer(valid_data, model, epoch, criterion, writer, model_name) logger.info('valid top1 %f', top1) # save if best_top1 < top1: best_top1 = top1 is_best = True else: is_best = False utils.save_checkpoint(model, config.path, is_best) print("") utils.time(time.time() - start) logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.MSELoss() criterion = criterion.cuda() model = Network(args.init_channels, 1, args.layers, criterion, input_channels=4) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # dataset = utils.BathymetryDataset(args, "guyane/guyane.csv") # dataset.add(args, "saint_louis/saint_louis.csv") dataset = utils.BathymetryDataset(args, "../mixed_train.csv", to_filter=False) dataset.add(args, "../mixed_validation.csv", to_balance=False) trains, vals = dataset.get_subset_indices(args.train_portion) train_queue = torch.utils.data.DataLoader( dataset, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(trains), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( dataset, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(vals), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, int(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) loggers = {"train": {"loss": [], "step": []}, "val": {"loss": [], "step": []}, "infer": {"loss": [], "step": []}} for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_last_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training _ = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, loggers) # validation infer_loss = infer(valid_queue, model, criterion) utils.log_loss(loggers["infer"], infer_loss, None, model.clock) utils.plot_loss_acc(loggers, args.save) model.update_history() utils.save_file(recoder=model.alphas_normal_history, path=os.path.join(args.save, 'normal')) utils.save_file(recoder=model.alphas_reduce_history, path=os.path.join(args.save, 'reduce')) utils.save(model, os.path.join(args.save, 'weights.pt')) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) np.save(os.path.join(os.path.join(args.save, 'normal_weight.npy')), F.softmax(model.alphas_normal, dim=-1).data.cpu().numpy()) np.save(os.path.join(os.path.join(args.save, 'reduce_weight.npy')), F.softmax(model.alphas_reduce, dim=-1).data.cpu().numpy()) genotype = model.genotype() logging.info('genotype = %s', genotype) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close()
def main(): if is_wandb_used: wandb.init(project="automl-gradient-based-nas", name="r" + str(args.run_id) + "-e" + str(args.epochs) + "-lr" + str(args.learning_rate) + "-l(" + str(args.lambda_train_regularizer) + "," + str(args.lambda_valid_regularizer) + ")", config=args, entity="automl") global is_multi_gpu gpus = [int(i) for i in args.gpu.split(',')] logging.info('gpus = %s' % gpus) if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %s' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() # default: args.init_channels = 16, CIFAR_CLASSES = 10, args.layers = 8 if args.arch_search_method == "DARTS": model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) elif args.arch_search_method == "GDAS": model = Network_GumbelSoftmax(args.init_channels, CIFAR_CLASSES, args.layers, criterion) else: model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) if len(gpus) > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) is_multi_gpu = True model.cuda() if args.model_path != "saved_models": utils.load(model, args.model_path) arch_parameters = model.module.arch_parameters( ) if is_multi_gpu else model.arch_parameters() arch_params = list(map(id, arch_parameters)) parameters = model.module.parameters( ) if is_multi_gpu else model.parameters() weight_params = filter(lambda p: id(p) not in arch_params, parameters) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( weight_params, # model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) # will cost time to download the data train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # split index train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size * len(gpus), sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size * len(gpus), sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, criterion, args) best_accuracy = 0 best_accuracy_different_cnn_counts = dict() if is_wandb_used: table = wandb.Table(columns=["Epoch", "Searched Architecture"]) for epoch in range(args.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # training train_acc, train_obj, train_loss = train(epoch, train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) if is_wandb_used: wandb.log({"searching_train_acc": train_acc, "epoch": epoch}) wandb.log({"searching_train_loss": train_loss, "epoch": epoch}) # validation with torch.no_grad(): valid_acc, valid_obj, valid_loss = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) scheduler.step() if is_wandb_used: wandb.log({"searching_valid_acc": valid_acc, "epoch": epoch}) wandb.log({"searching_valid_loss": valid_loss, "epoch": epoch}) wandb.log({ "search_train_valid_acc_gap": train_acc - valid_acc, "epoch": epoch }) wandb.log({ "search_train_valid_loss_gap": train_loss - valid_loss, "epoch": epoch }) # save the structure genotype, normal_cnn_count, reduce_cnn_count = model.module.genotype( ) if is_multi_gpu else model.genotype() # early stopping if args.early_stopping == 1: if normal_cnn_count == 6 and reduce_cnn_count == 0: break print("(n:%d,r:%d)" % (normal_cnn_count, reduce_cnn_count)) print( F.softmax(model.module.alphas_normal if is_multi_gpu else model.alphas_normal, dim=-1)) print( F.softmax(model.module.alphas_reduce if is_multi_gpu else model.alphas_reduce, dim=-1)) logging.info('genotype = %s', genotype) if is_wandb_used: wandb.log({"genotype": str(genotype)}, step=epoch - 1) table.add_data(str(epoch), str(genotype)) wandb.log({"Searched Architecture": table}) # save the cnn architecture according to the CNN count cnn_count = normal_cnn_count * 10 + reduce_cnn_count wandb.log({ "searching_cnn_count(%s)" % cnn_count: valid_acc, "epoch": epoch }) if cnn_count not in best_accuracy_different_cnn_counts.keys(): best_accuracy_different_cnn_counts[cnn_count] = valid_acc summary_key_cnn_structure = "best_acc_for_cnn_structure(n:%d,r:%d)" % ( normal_cnn_count, reduce_cnn_count) wandb.run.summary[summary_key_cnn_structure] = valid_acc summary_key_best_cnn_structure = "epoch_of_best_acc_for_cnn_structure(n:%d,r:%d)" % ( normal_cnn_count, reduce_cnn_count) wandb.run.summary[summary_key_best_cnn_structure] = epoch else: if valid_acc > best_accuracy_different_cnn_counts[cnn_count]: best_accuracy_different_cnn_counts[cnn_count] = valid_acc summary_key_cnn_structure = "best_acc_for_cnn_structure(n:%d,r:%d)" % ( normal_cnn_count, reduce_cnn_count) wandb.run.summary[summary_key_cnn_structure] = valid_acc summary_key_best_cnn_structure = "epoch_of_best_acc_for_cnn_structure(n:%d,r:%d)" % ( normal_cnn_count, reduce_cnn_count) wandb.run.summary[summary_key_best_cnn_structure] = epoch if valid_acc > best_accuracy: best_accuracy = valid_acc wandb.run.summary["best_valid_accuracy"] = valid_acc wandb.run.summary["epoch_of_best_accuracy"] = epoch utils.save(model, os.path.join(wandb.run.dir, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) logging.info("unparsed args = %s", unparsed) num_gpus = torch.cuda.device_count() genotype = eval("genotypes.%s" % args.arch) print('---------Genotype---------') logging.info(genotype) print('--------------------------') model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) model = torch.nn.DataParallel(model) model = model.cuda() start_epch = 0 if args.resume: MT = torch.load(os.path.join(args.save, 'weight_optimizers.pt')) model.load_state_dict(MT['net']) optimizer.load_state_dict(MT['optimizer']) start_epch = MT['epoch'] logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=args.data_dir, train=False, download=True, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.data_dir, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs)) best_acc = 0.0 for epoch in range(start_epch, args.epochs): model.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs model.drop_path_prob = args.drop_path_prob * epoch / args.epochs start_time = time.time() train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('Train_acc: %f', train_acc) scheduler.step() logging.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0]) valid_acc, valid_obj = infer(valid_queue, model, criterion) if valid_acc > best_acc: best_acc = valid_acc state = { 'net': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch } torch.save(state, os.path.join(args.save, 'best_weight_optimizers.pt')) logging.info('Valid_acc: %f, best_acc: %f', valid_acc, best_acc) end_time = time.time() duration = end_time - start_time print('Epoch time: %d h.' % (duration * (args.epochs - epoch) / 3600)) if epoch % 50 == 0: state = { 'net': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch } torch.save(state, os.path.join(args.save, 'weight_optimizers.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, k=args.k) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.dataset == 'cifar100': train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True) architect = Architect(model, args) # configure progressive parameter epoch = 0 ks = [6, 4] num_keeps = [7, 4] train_epochs = [2, 2] if 'debug' in args.save else [25, 25] scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(sum(train_epochs)), eta_min=args.learning_rate_min) for i, current_epochs in enumerate(train_epochs): for e in range(current_epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) model.show_arch_parameters() # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, e) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) epoch += 1 scheduler.step() utils.save(model, os.path.join(args.save, 'weights.pt')) if not i == len(train_epochs) - 1: model.pruning(num_keeps[i + 1]) # architect.pruning([model.mask_normal, model.mask_reduce]) model.wider(ks[i + 1]) optimizer = configure_optimizer( optimizer, torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)) scheduler = configure_scheduler( scheduler, torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(sum(train_epochs)), eta_min=args.learning_rate_min)) logging.info('pruning finish, %d ops left per edge', num_keeps[i + 1]) logging.info('network wider finish, current pc parameter %d', ks[i + 1]) genotype = model.genotype() logging.info('genotype = %s', genotype) model.show_arch_parameters()
def main(): if args.load_checkpoint: args.save = Path(args.load_checkpoint) / 'eval-imagenet-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S")) else: args.save = Path('logs') / 'eval-imagenet-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S")) utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(args.save / 'log.txt') fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) model = eval(args.model) # flops, params = profile(model, input_size=(1, 3, 224, 224)) # print("flops" + str(flops) + " params" + str(params)) if args.load_checkpoint: dictionary = torch.load(args.load_checkpoint) start_epoch = dictionary['epoch'] if args.start_epoch == -1 else args.start_epoch model.load_state_dict(dictionary['state_dict']) else: start_epoch = 0 if args.start_epoch == -1 else args.start_epoch direct_model = model if args.gpu: model = nn.DataParallel(model) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay ) # if args.load_checkpoint: # optimizer.load_state_dict(dictionary['optimizer']) # del dictionary traindir = os.path.join(args.data, 'train') validdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_data = dset.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ColorJitter( brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2), transforms.ToTensor(), normalize, ])) valid_data = dset.ImageFolder( validdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.num_workers) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.num_workers) if args.eval: direct_model.drop_path_prob = 0 valid_acc_top1, valid_acc_top5, valid_obj = infer(valid_queue, model, args.gpu) logging.info('valid_acc_top1 %f', valid_acc_top1) logging.info('valid_acc_top5 %f', valid_acc_top5) return if args.period is not None: periods = args.period.split(',') periods = [int(p) for p in periods] totals = [] total = 0 for p in periods: total += p totals.append(total) scheduler = CosineAnnealingLR(optimizer, periods[0]) else: periods = None scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.decay_period, gamma=args.gamma) best_acc_top1 = 0 for epoch in range(start_epoch, args.epochs): if args.period is None: scheduler.step(epoch) else: assert len(periods) > 0 index = bisect.bisect_left(totals, epoch) scheduler.T_max = periods[index] if index == 0: e = epoch else: e = epoch - totals[index - 1] scheduler.step(e % periods[index]) logging.info("schedule epoch:" + str(e % periods[index])) logging.info("schedule period:" + str(periods[index])) logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) direct_model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, optimizer, args.gpu) logging.info('train_acc %f', train_acc) valid_acc_top1, valid_acc_top5, valid_obj = infer(valid_queue, model, args.gpu) logging.info('valid_acc_top1 %f', valid_acc_top1) logging.info('valid_acc_top5 %f', valid_acc_top5) is_best = False if valid_acc_top1 > best_acc_top1: best_acc_top1 = valid_acc_top1 is_best = True utils.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.module.state_dict(), 'best_acc_top1': best_acc_top1, 'optimizer': optimizer.state_dict(), }, is_best, args.save)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) if TORCH_VERSION.startswith('1'): torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) # load search configuration file holding the found architectures configuration = '_'.join([args.space, args.dataset]) settings = '_'.join([str(args.search_dp), str(args.search_wd)]) with open(args.archs_config_file, 'r') as f: cfg = yaml.load(f) arch = dict(cfg)[configuration][settings][args.search_task_id] print(arch) genotype = eval(arch) model = Network(args.init_channels, args.n_classes, args.layers, args.auxiliary, genotype) if TORCH_VERSION.startswith('1'): model = model.to(device) else: model = model.cuda() if args.model_path is not None: utils.load(model, args.model_path, genotype) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() if TORCH_VERSION.startswith('1'): criterion = criterion.to(device) else: criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = CosineAnnealingLR(optimizer, float(args.epochs)) train_queue, valid_queue, _, _ = helper.get_train_val_loaders() errors_dict = { 'train_acc': [], 'train_loss': [], 'valid_acc': [], 'valid_loss': [] } for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs # training train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('train_acc %f', train_acc) # evaluation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) # update the errors dictionary errors_dict['train_acc'].append(100 - train_acc) errors_dict['train_loss'].append(train_obj) errors_dict['valid_acc'].append(100 - valid_acc) errors_dict['valid_loss'].append(valid_obj) with codecs.open(os.path.join( args.save, 'errors_{}_{}.json'.format(args.search_task_id, args.task_id)), 'w', encoding='utf-8') as file: json.dump(errors_dict, file, separators=(',', ':')) utils.write_yaml_results_eval(args, args.results_test, 100 - valid_acc)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) torch.cuda.empty_cache() cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype_path = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], 'genotype.txt') print(genotype_path) if os.path.isfile(genotype_path): with open(genotype_path, "r") as f: geno_raw = f.read() genotype = eval(geno_raw) else: genoname = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], 'genoname.txt') if os.path.isfile(genoname): with open(genoname, "r") as f: args.arch = f.read() genotype = eval("genotypes.%s" % args.arch) else: genotype = eval("genotypes.ADMM") model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = model.cuda() utils.load(model, os.path.join(utils.get_dir(), args.model_path)) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() _, test_transform = utils._data_transforms_cifar10(args) datapath = os.path.join(utils.get_dir(), args.data) test_data = dset.CIFAR10(root=datapath, train=False, download=True, transform=test_transform) test_queue = torch.utils.data.DataLoader( test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) if args.task == "CIFAR100cf": _, test_transform = utils._data_transforms_cifar100(args) test_data = utils.CIFAR100C2F(root=datapath, train=False, download=True, transform=test_transform) test_indices = test_data.filter_by_fine(args.test_filter) test_queue = torch.utils.data.DataLoader( torch.utils.data.Subset(test_data, test_indices), batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) # TODO: extend each epoch or multiply number of epochs by 20%*args.class_filter else: if args.task == "CIFAR100": _, test_transform = utils._data_transforms_cifar100(args) test_data = dset.CIFAR100(root=datapath, train=False, download=True, transform=test_transform) else: _, test_transform = utils._data_transforms_cifar10(args) test_data = dset.CIFAR10(root=datapath, train=False, download=True, transform=test_transform) test_queue = torch.utils.data.DataLoader( test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) model.drop_path_prob = args.drop_path_prob test_acc, test_obj = infer(test_queue, model, criterion) logging.info('test_acc %f', test_acc)
def compute(self, config, budget, *args, **kwargs): """ Get model with hyperparameters from config generated by get_configspace() """ if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) logging.info(f'Running config for {budget} epochs') gpu = 'cuda:0' np.random.seed(self.seed) torch.cuda.set_device(gpu) cudnn.benchmark = True torch.manual_seed(self.seed) cudnn.enabled = True torch.cuda.manual_seed(self.seed) logging.info('gpu device = %s' % gpu) logging.info("config = %s", config) ensemble_model = EnsembleModel(self.trained_models, dense_units=config['dense_units'], out_size=self.train_dataset.n_classes) ensemble_model = ensemble_model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(ensemble_model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() if config['optimizer'] == 'sgd': optimizer = torch.optim.SGD(ensemble_model.parameters(), lr=config['initial_lr'], momentum=config['sgd_momentum'], weight_decay=config['weight_decay'], nesterov=config['nesterov']) else: optimizer = get('opti_dict')[config['optimizer']]( ensemble_model.parameters(), lr=config['initial_lr'], weight_decay=config['weight_decay']) if config['lr_scheduler'] == 'Cosine': lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, int(budget)) elif config['lr_scheduler'] == 'Exponential': lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1) indices = list( np.random.randint( 0, 2 * len(self.train_dataset) // 3, size=len(self.train_dataset) // 3)) #list(range(int(self.split*len(self.train_dataset)))) valid_indices = list( np.random.randint(2 * len(self.train_dataset) // 3, len(self.train_dataset), size=len(self.train_dataset) // 6) ) #list(range(int(self.split*len(self.train_dataset)), len(self.train_dataset))) print("Training size=", len(indices)) training_sampler = SubsetRandomSampler(indices) valid_sampler = SubsetRandomSampler(valid_indices) train_queue = torch.utils.data.DataLoader(dataset=self.train_dataset, batch_size=self.batch_size, sampler=training_sampler) valid_queue = torch.utils.data.DataLoader(dataset=self.train_dataset, batch_size=self.batch_size, sampler=valid_sampler) for epoch in range(int(budget)): logging.info('epoch %d lr %e', epoch, lr_scheduler.get_lr()[0]) ensemble_model.drop_path_prob = config[ 'drop_path_prob'] * epoch / int(budget) train_acc, train_obj = ensemble_train( train_queue, ensemble_model, criterion, optimizer, grad_clip=config['grad_clip_value']) logging.info('train_acc %f', train_acc) lr_scheduler.step() valid_acc, valid_obj = infer(valid_queue, ensemble_model, criterion) logging.info('valid_acc %f', valid_acc) return ({ 'loss': valid_obj, # Hyperband always minimizes, so we want to minimise the error, error = 1-accuracy 'info': {} # mandatory- can be used in the future to give more information })
def main(): # Select the search space to search in if args.search_space == '1': search_space = SearchSpace1() elif args.search_space == '2': search_space = SearchSpace2() elif args.search_space == '3': search_space = SearchSpace3() else: raise ValueError('Unknown search space') if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.num_linear_layers, args.init_channels, CIFAR_CLASSES, args.layers, criterion, output_weights=args.output_weights, steps=search_space.num_intermediate_nodes, search_space=search_space) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) # Read a random sample of architectures archs = pickle.load( open( '/home/siemsj/projects/darts_weight_sharing_analysis/nasbench_analysis/architecture_inductive_bias/sampled_architectures_from_search_space_3.obj', 'rb')) arch = archs[args.arch_idx] arch_parameters = get_weights_from_arch(arch, model) model._arch_parameters = arch_parameters try: for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] # increase the cutout probability linearly throughout search train_transform.transforms[ -1].cutout_prob = args.cutout_prob * epoch / (args.epochs - 1) logging.info('epoch %d lr %e cutout_prob %e', epoch, lr, train_transform.transforms[-1].cutout_prob) # Save the one shot model architecture weights for later analysis arch_filename = os.path.join( args.save, 'one_shot_architecture_{}.obj'.format(epoch)) with open(arch_filename, 'wb') as filehandler: numpy_tensor_list = [] for tensor in model.arch_parameters(): numpy_tensor_list.append(tensor.detach().cpu().numpy()) pickle.dump(numpy_tensor_list, filehandler) # Save the entire one-shot-model # filepath = os.path.join(args.save, 'one_shot_model_{}.obj'.format(epoch)) # torch.save(model.state_dict(), filepath) logging.info('architecture', numpy_tensor_list) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) logging.info('STARTING EVALUATION') test, valid, runtime, params = naseval.eval_one_shot_model( config=args.__dict__, model=arch_filename) index = np.random.choice(list(range(3))) logging.info( 'TEST ERROR: %.3f | VALID ERROR: %.3f | RUNTIME: %f | PARAMS: %d' % (test[index], valid[index], runtime[index], params[index])) except Exception as e: logging.exception('message')
teacher_train_config = copy.deepcopy(train_config) teacher_name = '{}_{}_best.pth.tar'.format(args.teacher, trial_id) teacher_train_config['Student_name'] = args.teacher teacher_trainer = TrainManager(teacher_model, TA=None, teacher=None, train_loader=train_loader, test_loader=test_loader, train_config=teacher_train_config) best_teacher_acc, process_form = teacher_trainer.train() teacher_model = load_checkpoint(teacher_model, os.path.join('./', teacher_name)) # Student training print("Teacher param size = %fMB", utils.count_parameters_in_MB(teacher_model)) print("Teaching assistant param size = %fMB", utils.count_parameters_in_MB(TA_model)) print("Student param size = %fMB", utils.count_parameters_in_MB(student_model)) print("---------- Training Teaching Assistant -------") TA_train_config = copy.deepcopy(train_config) train_loader, test_loader = get_cifar(num_classes, batch_size=args.batch_size) TA_train_config['TA_name'] = args.TA TA_train_config['Student_name'] = args.student TA_trainer = TrainManager(TA_model, student_model, teacher=teacher_model, train_loader=train_loader,
def main(primitives): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model_init = Network(args.init_channels, args.n_classes, args.layers, criterion, primitives, steps=args.nodes) model_init = model_init.cuda() #logging.info("param size = %fMB", utils.count_parameters_in_MB(model_init)) optimizer_init = torch.optim.SGD(model_init.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) architect_init = Architect(model_init, args) scheduler_init = CosineAnnealingLR(optimizer_init, float(args.epochs), eta_min=args.learning_rate_min) analyser_init = Analyzer(args, model_init) la_tracker = utils.EVLocalAvg(args.window, args.report_freq_hessian, args.epochs) if args.resume: if os.path.isfile(args.resume_file): print("=> loading checkpoint '{}'".format(args.resume_file)) checkpoint = torch.load(args.resume_file) start_epoch = 27 print('start_epoch', start_epoch) model_init.load_state_dict(checkpoint['state_dict']) model_init.alphas_normal.data = checkpoint['alphas_normal'] model_init.alphas_reduce.data = checkpoint['alphas_reduce'] model_init = model_init.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model_init)) optimizer_init.load_state_dict(checkpoint['optimizer']) architect_init.optimizer.load_state_dict( checkpoint['arch_optimizer']) scheduler_init = CosineAnnealingLR(optimizer_init, float(args.epochs), eta_min=args.learning_rate_min) analyser_init = Analyzer(args, model_init) la_tracker = utils.EVLocalAvg(args.window, args.report_freq_hessian, args.epochs) la_tracker.ev = checkpoint['ev'] la_tracker.ev_local_avg = checkpoint['ev_local_avg'] la_tracker.genotypes = checkpoint['genotypes'] la_tracker.la_epochs = checkpoint['la_epochs'] la_tracker.la_start_idx = checkpoint['la_start_idx'] la_tracker.la_end_idx = checkpoint['la_end_idx'] lr = checkpoint['lr'] train_queue, valid_queue, train_transform, valid_transform = helper.get_train_val_loaders( ) errors_dict = { 'train_acc': [], 'train_loss': [], 'valid_acc': [], 'valid_loss': [] } #for epoch in range(args.epochs): def train_epochs(epochs_to_train, iteration, args=args, model=model_init, optimizer=optimizer_init, scheduler=scheduler_init, train_queue=train_queue, valid_queue=valid_queue, train_transform=train_transform, valid_transform=valid_transform, architect=architect_init, criterion=criterion, primitives=primitives, analyser=analyser_init, la_tracker=la_tracker, errors_dict=errors_dict, start_epoch=-1): logging.info('STARTING ITERATION: %d', iteration) logging.info('EPOCHS TO TRAIN: %d', epochs_to_train - start_epoch - 1) la_tracker.stop_search = False if epochs_to_train - start_epoch - 1 <= 0: return model.genotype(), -1 for epoch in range(start_epoch + 1, epochs_to_train): # set the epoch to the right one #epoch += args.epochs - epochs_to_train scheduler.step(epoch) lr = scheduler.get_lr()[0] if args.drop_path_prob != 0: model.drop_path_prob = args.drop_path_prob * epoch / ( args.epochs - 1) train_transform.transforms[ -1].cutout_prob = args.cutout_prob * epoch / (args.epochs - 1) logging.info('epoch %d lr %e drop_prob %e cutout_prob %e', epoch, lr, model.drop_path_prob, train_transform.transforms[-1].cutout_prob) else: logging.info('epoch %d lr %e', epoch, lr) # training train_acc, train_obj = train(epoch, primitives, train_queue, valid_queue, model, architect, criterion, optimizer, lr, analyser, la_tracker, iteration) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) # update the errors dictionary errors_dict['train_acc'].append(100 - train_acc) errors_dict['train_loss'].append(train_obj) errors_dict['valid_acc'].append(100 - valid_acc) errors_dict['valid_loss'].append(valid_obj) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) state = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'alphas_normal': model.alphas_normal.data, 'alphas_reduce': model.alphas_reduce.data, 'arch_optimizer': architect.optimizer.state_dict(), 'lr': lr, 'ev': la_tracker.ev, 'ev_local_avg': la_tracker.ev_local_avg, 'genotypes': la_tracker.genotypes, 'la_epochs': la_tracker.la_epochs, 'la_start_idx': la_tracker.la_start_idx, 'la_end_idx': la_tracker.la_end_idx, #'scheduler': scheduler.state_dict(), } utils.save_checkpoint(state, False, args.save, epoch, args.task_id) if not args.compute_hessian: ev = -1 else: ev = la_tracker.ev[-1] params = { 'iteration': iteration, 'epoch': epoch, 'wd': args.weight_decay, 'ev': ev, } schedule_of_params.append(params) # limit the number of iterations based on the maximum regularization # value predefined by the user final_iteration = round( np.log(args.max_weight_decay) / np.log(args.weight_decay), 1) == 1. ##lr decay到一定程度就停止 if la_tracker.stop_search and not final_iteration: if args.early_stop == 1: # set the following to the values they had at stop_epoch errors_dict['valid_acc'] = errors_dict[ 'valid_acc'][:la_tracker.stop_epoch + 1] genotype = la_tracker.stop_genotype valid_acc = 100 - errors_dict['valid_acc'][ la_tracker.stop_epoch] logging.info( 'Decided to stop the search at epoch %d (Current epoch: %d)', la_tracker.stop_epoch, epoch) logging.info('Validation accuracy at stop epoch: %f', valid_acc) logging.info('Genotype at stop epoch: %s', genotype) break elif args.early_stop == 2: # simulate early stopping and continue search afterwards simulated_errors_dict = errors_dict[ 'valid_acc'][:la_tracker.stop_epoch + 1] simulated_genotype = la_tracker.stop_genotype simulated_valid_acc = 100 - simulated_errors_dict[ la_tracker.stop_epoch] logging.info( '(SIM) Decided to stop the search at epoch %d (Current epoch: %d)', la_tracker.stop_epoch, epoch) logging.info('(SIM) Validation accuracy at stop epoch: %f', simulated_valid_acc) logging.info('(SIM) Genotype at stop epoch: %s', simulated_genotype) with open( os.path.join(args.save, 'arch_early_{}'.format(args.task_id)), 'w') as file: file.write(str(simulated_genotype)) utils.write_yaml_results(args, 'early_' + args.results_file_arch, str(simulated_genotype)) utils.write_yaml_results(args, 'early_stop_epochs', la_tracker.stop_epoch) args.early_stop = 0 elif args.early_stop == 3: # adjust regularization simulated_errors_dict = errors_dict[ 'valid_acc'][:la_tracker.stop_epoch + 1] simulated_genotype = la_tracker.stop_genotype simulated_valid_acc = 100 - simulated_errors_dict[ la_tracker.stop_epoch] stop_epoch = la_tracker.stop_epoch start_again_epoch = stop_epoch - args.extra_rollback_epochs logging.info( '(ADA) Decided to increase regularization at epoch %d (Current epoch: %d)', stop_epoch, epoch) logging.info('(ADA) Rolling back to epoch %d', start_again_epoch) logging.info( '(ADA) Restoring model parameters and continuing for %d epochs', epochs_to_train - start_again_epoch - 1) if iteration == 1: logging.info( '(ADA) Saving the architecture at the early stop epoch and ' 'continuing with the adaptive regularization strategy' ) utils.write_yaml_results( args, 'early_' + args.results_file_arch, str(simulated_genotype)) del model del architect del optimizer del scheduler del analyser model_new = Network(args.init_channels, args.n_classes, args.layers, criterion, primitives, steps=args.nodes) model_new = model_new.cuda() optimizer_new = torch.optim.SGD( model_new.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) architect_new = Architect(model_new, args) analyser_new = Analyzer(args, model_new) la_tracker = utils.EVLocalAvg(args.window, args.report_freq_hessian, args.epochs) lr = utils.load_checkpoint(model_new, optimizer_new, None, architect_new, args.save, la_tracker, start_again_epoch, args.task_id) args.weight_decay *= args.mul_factor for param_group in optimizer_new.param_groups: param_group['weight_decay'] = args.weight_decay scheduler_new = CosineAnnealingLR( optimizer_new, float(args.epochs), eta_min=args.learning_rate_min) logging.info('(ADA) Validation accuracy at stop epoch: %f', simulated_valid_acc) logging.info('(ADA) Genotype at stop epoch: %s', simulated_genotype) logging.info( '(ADA) Adjusting L2 regularization to the new value: %f', args.weight_decay) genotype, valid_acc = train_epochs(args.epochs, iteration + 1, model=model_new, optimizer=optimizer_new, architect=architect_new, scheduler=scheduler_new, analyser=analyser_new, start_epoch=start_epoch) args.early_stop = 0 break return genotype, valid_acc # call train_epochs recursively genotype, valid_acc = train_epochs(args.epochs, 1) with codecs.open(os.path.join(args.save, 'errors_{}.json'.format(args.task_id)), 'w', encoding='utf-8') as file: json.dump(errors_dict, file, separators=(',', ':')) with open(os.path.join(args.save, 'arch_{}'.format(args.task_id)), 'w') as file: file.write(str(genotype)) utils.write_yaml_results(args, args.results_file_arch, str(genotype)) utils.write_yaml_results(args, args.results_file_perf, 100 - valid_acc) with open( os.path.join(args.save, 'schedule_{}.pickle'.format(args.task_id)), 'ab') as file: pickle.dump(schedule_of_params, file, pickle.HIGHEST_PROTOCOL)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, args.rho, args.ewma) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) datapath = os.path.join(utils.get_dir(), args.data) train_data = dset.CIFAR10(root=datapath, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, int(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) model.initialize_Z_and_U() loggers = {"train": {"loss": [], "acc": [], "step": []}, "val": {"loss": [], "acc": [], "step": []}, "infer": {"loss": [], "acc": [], "step": []}, "ath": {"threshold": [], "step": []}, "zuth": {"threshold": [], "step": []}, "astep": [], "zustep": []} if args.constant_alpha_threshold < 0: alpha_threshold = args.init_alpha_threshold else: alpha_threshold = args.constant_alpha_threshold zu_threshold = args.init_zu_threshold alpha_counter = 0 ewma = -1 for epoch in range(args.epochs): valid_iter = iter(valid_queue) model.clear_U() scheduler.step() lr = scheduler.get_last_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(torch.clamp(model.alphas_normal, min=0.1, max=1.0)) print(torch.clamp(model.alphas_reduce, min=0.1, max=1.0)) # training train_acc, train_obj, alpha_threshold, zu_threshold, alpha_counter, ewma = train(train_queue, valid_iter, model, architect, criterion, optimizer, lr, loggers, alpha_threshold, zu_threshold, alpha_counter, ewma, args) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) utils.log_loss(loggers["infer"], valid_obj, valid_acc, model.clock) logging.info('valid_acc %f', valid_acc) utils.plot_loss_acc(loggers, args.save) # model.update_history() utils.save_file(recoder=model.alphas_normal_history, path=os.path.join(args.save, 'normalalpha'), steps=loggers["train"]["step"]) utils.save_file(recoder=model.alphas_reduce_history, path=os.path.join(args.save, 'reducealpha'), steps=loggers["train"]["step"]) utils.save_file(recoder=model.FI_normal_history, path=os.path.join(args.save, 'normalFI'), steps=loggers["train"]["step"]) utils.save_file(recoder=model.FI_reduce_history, path=os.path.join(args.save, 'reduceFI'), steps=loggers["train"]["step"]) scaled_FI_normal = scale(model.FI_normal_history, model.alphas_normal_history) scaled_FI_reduce = scale(model.FI_reduce_history, model.alphas_reduce_history) utils.save_file(recoder=scaled_FI_normal, path=os.path.join(args.save, 'normalFIscaled'), steps=loggers["train"]["step"]) utils.save_file(recoder=scaled_FI_reduce, path=os.path.join(args.save, 'reduceFIscaled'), steps=loggers["train"]["step"]) utils.plot_FI(loggers["train"]["step"], model.FI_history, args.save, "FI", loggers["ath"], loggers['astep']) utils.plot_FI(loggers["train"]["step"], model.FI_ewma_history, args.save, "FI_ewma", loggers["ath"], loggers['astep']) utils.plot_FI(model.FI_alpha_history_step, model.FI_alpha_history, args.save, "FI_alpha", loggers["zuth"], loggers['zustep']) utils.save(model, os.path.join(args.save, 'weights.pt')) genotype = model.genotype() logging.info('genotype = %s', genotype) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close()
def main(primitives): if not torch.cuda.is_available() or args.disable_cuda: logging.info('no gpu device available or disabling cuda') np.random.seed(args.seed) torch.manual_seed(args.seed) if not args.disable_cuda: torch.cuda.set_device(args.gpu) logging.info('gpu device = %d' % args.gpu) cudnn.benchmark = True cudnn.enabled = True torch.cuda.manual_seed(args.seed) criterion = nn.CrossEntropyLoss() if not args.disable_cuda: criterion = criterion.cuda() model_init = Network(args.init_channels, args.n_classes, args.layers, criterion, primitives, steps=args.nodes, args=args) if not args.disable_cuda: model_init = model_init.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model_init)) optimizer_init = torch.optim.SGD(model_init.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) architect_init = Architect(model_init, args) scheduler_init = CosineAnnealingLR(optimizer_init, float(args.epochs), eta_min=args.learning_rate_min) analyser = Analyzer(args, model_init) la_tracker = utils.EVLocalAvg(args.window, args.report_freq_hessian, args.epochs) train_queue, valid_queue, train_transform, valid_transform = helper.get_train_val_loaders( ) def valid_generator(): while True: for x, t in valid_queue: yield x, t valid_gen = valid_generator() for epoch in range(args.ev_start_epoch - 1, args.epochs): beta_decay_scheduler.step(epoch) logging.info("EPOCH %d SKIP BETA DECAY RATE: %e", epoch, beta_decay_scheduler.decay_rate) if (epoch % args.report_freq_hessian == 0) or (epoch == (args.epochs - 1)): lr = utils.load_checkpoint(model_init, optimizer_init, None, architect_init, args.save, la_tracker, epoch, args.task_id) logging.info("Loaded %d-th checkpoint." % epoch) if args.test_infer: valid_acc, valid_obj = infer(valid_queue, model_init, criterion) logging.info('valid_acc %f', valid_acc) if args.compute_hessian: input, target = next(iter(train_queue)) input = Variable(input, requires_grad=False) target = Variable(target, requires_grad=False) input_search, target_search = next( valid_gen) #next(iter(valid_queue)) input_search = Variable(input_search, requires_grad=False) target_search = Variable(target_search, requires_grad=False) if not args.disable_cuda: input = input.cuda() target = target.cuda(async=True) input_search = input_search.cuda() target_search = target_search.cuda(async=True) if not args.debug: H = analyser.compute_Hw(input, target, input_search, target_search, lr, optimizer_init, False) g = analyser.compute_dw(input, target, input_search, target_search, lr, optimizer_init, False) g = torch.cat([x.view(-1) for x in g]) state = { 'epoch': epoch, 'H': H.cpu().data.numpy().tolist(), 'g': g.cpu().data.numpy().tolist(), #'g_train': float(grad_norm), #'eig_train': eigenvalue, } with codecs.open(os.path.join( args.save, 'derivatives_{}.json'.format(args.task_id)), 'a', encoding='utf-8') as file: json.dump(state, file, separators=(',', ':')) file.write('\n') # early stopping ev = max(LA.eigvals(H.cpu().data.numpy())) logging.info('CURRENT EV: %f', ev)
def main(): wandb.init( project="automl-gradient-based-nas", name="hw" + str(args.arch), config=args, entity="automl" ) wandb.config.update(args) # adds all of the arguments as config variables global is_multi_gpu if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) gpus = [int(i) for i in args.gpu.split(',')] logging.info('gpus = %s' % gpus) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %s' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) if len(gpus) > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) is_multi_gpu = True model.cuda() weight_params = model.module.parameters() if is_multi_gpu else model.parameters() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) wandb.run.summary["param_size"] = utils.count_parameters_in_MB(model) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD( weight_params, # model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay ) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs)) best_accuracy = 0 for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('train_acc %f', train_acc) wandb.log({"evaluation_train_acc": train_acc, 'epoch': epoch}) valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) wandb.log({"evaluation_valid_acc": valid_acc, 'epoch': epoch}) if valid_acc > best_accuracy: wandb.run.summary["best_valid_accuracy"] = valid_acc wandb.run.summary["epoch_of_best_accuracy"] = epoch best_accuracy = valid_acc utils.save(model, os.path.join(wandb.run.dir, 'weights-best.pt')) utils.save(model, os.path.join(wandb.run.dir, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) if is_wandb_used: wandb.init( project="automl-gradient-based-nas", name="ImageNet:" + str(args.arch), config=args, entity="automl" ) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CLASSES, args.layers, args.auxiliary, genotype) if args.parallel: model = nn.DataParallel(model).cuda() else: model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() criterion_smooth = CrossEntropyLabelSmooth(CLASSES, args.label_smooth) criterion_smooth = criterion_smooth.cuda() optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay ) traindir = os.path.join(args.data, 'train') validdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_data = dset.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ColorJitter( brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2), transforms.ToTensor(), normalize, ])) valid_data = dset.ImageFolder( validdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=4) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=4) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.decay_period, gamma=args.gamma) best_acc_top1 = 0 for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion_smooth, optimizer) logging.info('train_acc %f', train_acc) valid_acc_top1, valid_acc_top5, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc_top1 %f', valid_acc_top1) logging.info('valid_acc_top5 %f', valid_acc_top5) is_best = False if valid_acc_top1 > best_acc_top1: best_acc_top1 = valid_acc_top1 is_best = True utils.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_acc_top1': best_acc_top1, 'optimizer': optimizer.state_dict(), }, is_best, args.save)
def main(): # Select the search space to search in if args.search_space == '1': search_space = SearchSpace1() elif args.search_space == '2': search_space = SearchSpace2() elif args.search_space == '3': search_space = SearchSpace3() else: raise ValueError('Unknown search space') if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, output_weights=args.output_weights, steps=search_space.num_intermediate_nodes, search_space=search_space) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # Save the one shot model architecture weights for later analysis filehandler = open( os.path.join(args.save, 'one_shot_architecture_{}.obj'.format(epoch)), 'wb') numpy_tensor_list = [] for tensor in model.arch_parameters(): numpy_tensor_list.append(tensor.detach().cpu().numpy()) pickle.dump(numpy_tensor_list, filehandler) # Save the entire one-shot-model filepath = os.path.join(args.save, 'one_shot_model_{}.obj'.format(epoch)) torch.save(model.state_dict(), filepath) logging.info('architecture', numpy_tensor_list) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): start = time.time() if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) torch.cuda.set_device(config.local_rank % len(config.gpus)) torch.distributed.init_process_group(backend='nccl', init_method='env://') config.world_size = torch.distributed.get_world_size() config.total_batch = config.world_size * config.batch_size np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True CLASSES = 1000 channels = [32, 16, 24, 40, 80, 96, 192, 320, 1280] steps = [1, 1, 2, 3, 4, 3, 3, 1, 1] strides = [2, 1, 2, 2, 1, 2, 1, 1, 1] criterion = nn.CrossEntropyLoss() criterion_latency = LatencyLoss(channels[2:9], steps[2:8], strides[2:8]) criterion = criterion.cuda(config.gpus) criterion_latency = criterion_latency.cuda(config.gpus) model = Network(channels, steps, strides, CLASSES, criterion) model = model.to(device) #model = DDP(model, delay_allreduce=True) # For solve the custome loss can`t use model.parameters() in apex warpped model via https://github.com/NVIDIA/apex/issues/457 and model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[config.local_rank], output_device=config.local_rank) logger.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), config.w_lr, momentum=config.w_momentum, weight_decay=config.w_weight_decay) train_data = get_imagenet_iter_torch( type='train', # image_dir="/googol/atlas/public/cv/ILSVRC/Data/" # use soft link `mkdir ./data/imagenet && ln -s /googol/atlas/public/cv/ILSVRC/Data/CLS-LOC/* ./data/imagenet/` image_dir=config.data_path + config.dataset.lower(), batch_size=config.batch_size, num_threads=config.workers, world_size=config.world_size, local_rank=config.local_rank, crop=224, device_id=config.local_rank, num_gpus=config.gpus, portion=config.train_portion) valid_data = get_imagenet_iter_torch( type='train', # image_dir="/googol/atlas/public/cv/ILSVRC/Data/" # use soft link `mkdir ./data/imagenet && ln -s /googol/atlas/public/cv/ILSVRC/Data/CLS-LOC/* ./data/imagenet/` image_dir=config.data_path + "/" + config.dataset.lower(), batch_size=config.batch_size, num_threads=config.workers, world_size=config.world_size, local_rank=config.local_rank, crop=224, device_id=config.local_rank, num_gpus=config.gpus, portion=config.val_portion) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(config.epochs), eta_min=config.w_lr_min) if len(config.gpus) > 1: architect = Architect(model.module, config) else: architect = Architect(module, config) best_top1 = 0. for epoch in range(config.epochs): scheduler.step() lr = scheduler.get_lr()[0] logger.info('epoch %d lr %e', epoch, lr) #print(F.softmax(model.alphas_normal, dim=-1)) #print(F.softmax(model.alphas_reduce, dim=-1)) # training train_top1, train_loss = train(train_data, valid_data, model, architect, criterion, criterion_latency, optimizer, lr, epoch, writer) logger.info('Train top1 %f', train_top1) # validation top1 = 0 if config.epochs - epoch <= 1: top1, loss = infer(valid_data, model, epoch, criterion, writer) logger.info('valid top1 %f', top1) if len(config.gpus) > 1: genotype = model.module.genotype() else: genotype = model.genotype() logger.info("genotype = {}".format(genotype)) # genotype as a image plot_path = os.path.join(config.plot_path, "EP{:02d}".format(epoch + 1)) caption = "Epoch {}".format(epoch + 1) plot(genotype.normal, plot_path + "-normal") plot(genotype.reduce, plot_path + "-reduce") # save if best_top1 < top1: best_top1 = top1 best_genotype = genotype is_best = True else: is_best = False utils.save_checkpoint(model, config.path, is_best) print("") utils.time(time.time() - start) logger.info("Final best Prec@1 = {:.4%}".format(best_top1)) logger.info("Best Genotype = {}".format(best_genotype))
def main(): # Select the search space to search in if args.search_space == '1': search_space = SearchSpace1() elif args.search_space == '2': search_space = SearchSpace2() elif args.search_space == '3': search_space = SearchSpace3() else: raise ValueError('Unknown search space') if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, output_weights=args.output_weights, steps=search_space.num_intermediate_nodes, search_space=search_space) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) nasbench = None for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # Save the one shot model architecture weights for later analysis arch_filename = os.path.join( args.save, 'one_shot_architecture_{}.obj'.format(epoch)) with open(arch_filename, 'wb') as filehandler: numpy_tensor_list = [] for tensor in model.arch_parameters(): numpy_tensor_list.append(tensor.detach().cpu().numpy()) pickle.dump(numpy_tensor_list, filehandler) # Save the entire one-shot-model filepath = os.path.join(args.save, 'one_shot_model_{}.obj'.format(epoch)) torch.save(model.state_dict(), filepath) logging.info('architecture') logging.info(numpy_tensor_list) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) logging.info('STARTING EVALUATION') if nasbench is None: nasbench = NasbenchWrapper( dataset_file='/nasbench_data/nasbench_only108.tfrecord') test, valid, runtime, params = naseval.eval_one_shot_model( config=args.__dict__, model=arch_filename, nasbench_results=nasbench) index = np.random.choice(list(range(3))) logging.info( 'TEST ERROR: %.3f | VALID ERROR: %.3f | RUNTIME: %f | PARAMS: %d' % (test[index], valid[index], runtime[index], params[index])) if args.s3_bucket is not None: for root, dirs, files in os.walk(args.save): for f in files: if 'one_shot_model' not in f: path = os.path.join(root, f) upload_to_s3(path, args.s3_bucket, path)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs)) for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('train_acc %f', train_acc) valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))