def run(args, myargs): my_config = getattr(myargs.config, args.command) config = SearchConfig() for k, v in args.items(): assert not hasattr(config, k) setattr(config, k, v) for k, v in my_config.items(): if not hasattr(config, k): print('* config does not have %s' % k) setattr(config, k, v) device = torch.device("cuda") writer = myargs.writer writer.add_text('all_config', config.as_markdown(), 0) logger = myargs.logger config.print_params(logger.info_msg) config.data_path = os.path.expanduser(config.data_path) config.plot_path = os.path.join(args.outdir, 'plot') config.path = args.outdir main(config=config, logger=logger, device=device, myargs=myargs)
def main(): config = SearchConfig(section='fine-tune') device = torch.device("cuda") # tensorboard writer = SummaryWriter(log_dir=os.path.join(config.path, "tb")) writer.add_text('config', config.as_markdown(), 0) logger = utils.get_logger( os.path.join(config.path, "{}_tune.log".format(config.name))) config.print_params(logger.info) logger.info("Logger is set - training start") # set default gpu device id torch.cuda.set_device(config.gpus[0]) # set seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True # get data with meta info input_size, input_channels, n_classes, train_data, valid_data = utils.get_data( config.dataset, config.data_path, cutout_length=0, validation=True) logger.debug('loading checkpoint') best_path = os.path.join(config.path, 'best.pth.tar') model = torch.load(best_path) model.prune() model = model.to(device) # weights optimizer w_optim = torch.optim.SGD(model.weights(), config.w_lr, momentum=config.w_momentum, weight_decay=config.w_weight_decay) train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( w_optim, config.epochs, eta_min=config.w_lr_min) architect = Architect(model, config.w_momentum, config.w_weight_decay) model.print_alphas(logger) first_top1 = validate(valid_loader, model, -1, 0, device, config, logger, writer) os.system('mkdir -p ' + config.fine_tune_path) # training loop best_top1 = 0. for epoch in range(config.epochs): lr_scheduler.step() lr = lr_scheduler.get_lr()[0] model.print_alphas(logger) # training train(train_loader, model, architect, w_optim, lr, epoch, writer, device, config, logger) # validation cur_step = (epoch + 1) * len(train_loader) top1 = validate(valid_loader, model, epoch, cur_step, device, config, logger, writer) # save if best_top1 < top1: best_top1 = top1 is_best = True else: is_best = False utils.save_checkpoint(model, config.fine_tune_path, is_best) print("") logger.info("Initial best Prec@1 = {:.4%}".format(first_top1)) logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
assert error < epsilon return error config = SearchConfig() device = torch.device("cuda") # tensorboard writer = SummaryWriter(log_dir=os.path.join(config.path, "tb")) writer.add_text('config', config.as_markdown(), 0) logger = utils.get_logger( os.path.join(config.path, "{}.log".format(config.name))) config.print_params(logger.info) def main(): logger.info("Logger is set - training start") # set default gpu device id torch.cuda.set_device(config.gpus[0]) # set seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True
def main(): config = SearchConfig() device = torch.device("cuda") # tensorboard tb_path = os.path.join(config.path, "tb") os.system('rm -r ' + tb_path) writer = SummaryWriter(log_dir=tb_path) writer.add_text('config', config.as_markdown(), 0) logger = utils.get_logger( os.path.join(config.path, "{}_train.log".format(config.name))) config.print_params(logger.info) logger.info("Logger is set - training start") if int(config.profile) != 0: logger.info('entering profile mode') profile = True config.epochs = 1 max_batches = config.print_freq else: profile = False max_batches = None # set default gpu device id torch.cuda.set_device(config.gpus[0]) # set seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True # get data with meta info input_size, input_channels, n_classes, train_data = utils.get_data( config.dataset, config.data_path, cutout_length=0, validation=False) module_name, class_name = config.controller_class.rsplit('.', 1) controller_cls = getattr(import_module(module_name), class_name) model = controller_cls(device, **config.__dict__) model = model.to(device) # weights optimizer w_optim = torch.optim.SGD(model.weights(), config.w_lr, momentum=config.w_momentum, weight_decay=config.w_weight_decay) # alphas optimizer alpha_optim = torch.optim.Adam(model.alphas(), config.alpha_lr, betas=(0.5, 0.999), weight_decay=config.alpha_weight_decay) # split data to train/validation n_train = len(train_data) split = int(n_train * config.validate_split) indices = list(range(n_train)) if split <= 0: logger.debug('using train as validation') valid_sampler = train_sampler = torch.utils.data.sampler.SubsetRandomSampler( indices) else: train_sampler = torch.utils.data.sampler.SubsetRandomSampler( indices[:split]) valid_sampler = torch.utils.data.sampler.SubsetRandomSampler( indices[split:]) train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, sampler=train_sampler, num_workers=config.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, sampler=valid_sampler, num_workers=config.workers, pin_memory=True) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( w_optim, config.epochs, eta_min=config.w_lr_min) architect = Architect(model, config.w_momentum, config.w_weight_decay) # training loop best = 0 best_genotype = None for epoch in range(config.epochs): lr_scheduler.step() lr = lr_scheduler.get_lr()[0] model.print_alphas(logger) # training if profile: with torch.autograd.profiler.profile(use_cuda=True) as prof: train_qual = train(train_loader, valid_loader, model, architect, w_optim, alpha_optim, lr, epoch, writer, device, config, logger, max_batches=max_batches) print('cpu') print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10)) print(prof.key_averages().table(sort_by="cpu_time", row_limit=10)) print('cuda') print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10)) print(prof.key_averages().table(sort_by="cuda_time", row_limit=10)) break model.new_epoch(epoch, writer) train_qual = train(train_loader, valid_loader, model, architect, w_optim, alpha_optim, lr, epoch, writer, device, config, logger, max_batches=max_batches) # validation cur_step = (epoch + 1) * len(train_loader) val_qual = validate(valid_loader, model, epoch, cur_step, device, config, logger, writer) # log # genotype genotype = model.genotype() logger.info("genotype = {}".format(genotype)) # genotype as a image plot_path = os.path.join(config.plot_path, "EP{:02d}".format(epoch + 1)) caption = "Epoch {}".format(epoch + 1) model.plot_genotype(plot_path, caption) #plot(genotype.normal, plot_path + "-normal", caption) #plot(genotype.reduce, plot_path + "-reduce", caption) if config.use_train_quality != 0: cur_qual = train_qual else: cur_qual = val_qual # save if best < cur_qual: best = cur_qual best_genotype = genotype is_best = True else: is_best = False utils.save_checkpoint(model, config.path, is_best) logger.info("Quality{}: {} \n\n".format('*' if is_best else '', cur_qual)) logger.info("Final best = {}".format(best)) logger.info("Best Genotype = {}".format(best_genotype))