def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.MSELoss() criterion = criterion.cuda() model = Network(args.init_channels, 1, args.layers, criterion, input_channels=4) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # dataset = utils.BathymetryDataset(args, "guyane/guyane.csv") # dataset.add(args, "saint_louis/saint_louis.csv") dataset = utils.BathymetryDataset(args, "../mixed_train.csv", to_filter=False) dataset.add(args, "../mixed_validation.csv", to_balance=False) trains, vals = dataset.get_subset_indices(args.train_portion) train_queue = torch.utils.data.DataLoader( dataset, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(trains), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( dataset, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(vals), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, int(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) loggers = {"train": {"loss": [], "step": []}, "val": {"loss": [], "step": []}, "infer": {"loss": [], "step": []}} for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_last_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training _ = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, loggers) # validation infer_loss = infer(valid_queue, model, criterion) utils.log_loss(loggers["infer"], infer_loss, None, model.clock) utils.plot_loss_acc(loggers, args.save) model.update_history() utils.save_file(recoder=model.alphas_normal_history, path=os.path.join(args.save, 'normal')) utils.save_file(recoder=model.alphas_reduce_history, path=os.path.join(args.save, 'reduce')) utils.save(model, os.path.join(args.save, 'weights.pt')) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) np.save(os.path.join(os.path.join(args.save, 'normal_weight.npy')), F.softmax(model.alphas_normal, dim=-1).data.cpu().numpy()) np.save(os.path.join(os.path.join(args.save, 'reduce_weight.npy')), F.softmax(model.alphas_reduce, dim=-1).data.cpu().numpy()) genotype = model.genotype() logging.info('genotype = %s', genotype) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close()
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(0) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() """Noise Darts""" if args.noise_darts: SearchControllerConf['noise_darts']['noise_type'] = args.noise_type SearchControllerConf['noise_darts']['T_max'] = args.max_step else: SearchControllerConf['noise_darts'] = None """Random Darts""" if args.random_search: SearchControllerConf['random_search']['num_identity'] = args.num_identity SearchControllerConf['random_search']['num_arch'] = args.num_arch SearchControllerConf['random_search']['flops_threshold'] = args.flops_threshold else: SearchControllerConf['random_search'] = None """Reweight Darts""" SearchControllerConf['reweight'] = args.reweight model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) if args.random_search: genotype_list = model.random_generate() logging.info('genotype list = %s', genotype_list) logging.info('generate done!') sys.exit(0) model_optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) ## single level arch_optimizer = torch.optim.Adam(model.arch_parameters(), lr=args.arch_learning_rate, betas=(0.9, 0.999), weight_decay=args.arch_weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( model_optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) logging.info(F.softmax(model.alphas_normal, dim=-1)) logging.info(F.softmax(model.alphas_reduce, dim=-1)) model.update_history() # training and search the model train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, model_optimizer, lr, epoch) logging.info('train_acc %f', train_acc) # validation the model valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) utils.save_file(recoder = model.alphas_normal_history, path = os.path.join(args.save, 'normal')) utils.save_file(recoder = model.alphas_reduce_history, path = os.path.join(args.save, 'reduce'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(0) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) run_start = time.time() start_epoch = 0 dur_time = 0 criterion_train = ConvSeparateLoss( weight=args.aux_loss_weight ) if args.sep_loss == 'l2' else TriSeparateLoss( weight=args.aux_loss_weight) criterion_val = nn.CrossEntropyLoss() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion_train, steps=4, multiplier=4, stem_multiplier=3, parse_method=args.parse_method, op_threshold=args.op_threshold) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) model_optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) arch_optimizer = torch.optim.Adam(model.arch_parameters(), lr=args.arch_learning_rate, betas=(0.9, 0.999), weight_decay=args.arch_weight_decay) train_transform, valid_transform = utils._data_transforms_cifar(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) architect = Architect(model, args) # resume from checkpoint if args.resume: if os.path.isfile(args.resume): logging.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] dur_time = checkpoint['dur_time'] model_optimizer.load_state_dict(checkpoint['model_optimizer']) architect.arch_optimizer.load_state_dict( checkpoint['arch_optimizer']) model.restore(checkpoint['network_states']) logging.info('=> loaded checkpoint \'{}\'(epoch {})'.format( args.resume, start_epoch)) else: logging.info('=> no checkpoint found at \'{}\''.format( args.resume)) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( model_optimizer, float(args.epochs), eta_min=args.learning_rate_min, last_epoch=-1 if start_epoch == 0 else start_epoch) if args.resume and os.path.isfile(args.resume): scheduler.load_state_dict(checkpoint['scheduler']) for epoch in range(start_epoch, args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) logging.info(F.sigmoid(model.alphas_normal)) logging.info(F.sigmoid(model.alphas_reduce)) model.update_history() # training and search the model train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion_train, model_optimizer, arch_optimizer) logging.info('train_acc %f', train_acc) # validation the model valid_acc, valid_obj = infer(valid_queue, model, criterion_val) logging.info('valid_acc %f', valid_acc) # save checkpoint utils.save_checkpoint( { 'epoch': epoch + 1, 'dur_time': dur_time + time.time() - run_start, 'scheduler': scheduler.state_dict(), 'model_optimizer': model_optimizer.state_dict(), 'arch_optimizer': architect.optimizer.state_dict(), 'network_states': model.states(), }, is_best=False, save=args.save) logging.info('save checkpoint (epoch %d) in %s dur_time: %s', epoch, args.save, utils.calc_time(dur_time + time.time() - run_start)) # save operation weights as fig utils.save_file(recoder=model.alphas_normal_history, path=os.path.join(args.save, 'normal')) utils.save_file(recoder=model.alphas_reduce_history, path=os.path.join(args.save, 'reduce')) # save last operations np.save(os.path.join(os.path.join(args.save, 'normal_weight.npy')), F.sigmoid(model.alphas_normal).data.cpu().numpy()) np.save(os.path.join(os.path.join(args.save, 'reduce_weight.npy')), F.sigmoid(model.alphas_reduce).data.cpu().numpy()) logging.info('save last weights done')