def evaluation(sample, name): geno = eval(convert_sample_to_genotype(sample)) logger.info('Model sample: {}'.format(sample)) logger.info('Genotype: {}'.format(str(geno))) # get data with meta info input_size, input_channels, n_classes, train_data, valid_data = utils.get_data( 'cifar10', args.data_path, config['image_size'], config['cutout_length'], validation=True) criterion = nn.CrossEntropyLoss().to(device) use_aux = True # change size of input image input_size = config['image_size'] model = AugmentCNN(input_size, input_channels, config['init_channels'], 10, config['layers'], True, geno) mb_params = utils.param_size(model) logger.info("Model size = {:.3f} MB".format(mb_params)) model = nn.DataParallel(model, device_ids=[0]).to(device) # weights optimizer optimizer = torch.optim.SGD(model.parameters(), config['lr'], momentum=0.9, weight_decay=3e-4) # get data loader train_loader = torch.utils.data.DataLoader(train_data, batch_size=config['batch_size'], \ shuffle=True, num_workers=4, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config['batch_size'], \ shuffle=True, num_workers=4, pin_memory=True) # lr scheduler lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, config['epochs']) best_top1 = 0. len_train_loader = len(train_loader) # training loop for epoch in range(config['epochs']): lr_scheduler.step() drop_prob = 0.2 * epoch / config['epochs'] model.module.drop_path_prob(drop_prob, config['fp']) train(train_loader, model, optimizer, criterion, epoch) cur_step = (epoch + 1) * len_train_loader top1 = validate(valid_loader, model, criterion, epoch, cur_step) if best_top1 < top1: best_top1 = top1 is_best = True else: is_best = False # utils.save_checkpoint(model, config.path, is_best) logger.info("Final best Prec@1 = {:.4%}".format(best_top1)) return best_top1, geno
def main(): logger.info("Logger is set - training start") logger.info("Torch version is: {}".format(torch.__version__)) logger.info("Torch_vision version is: {}".format(torchvision.__version__)) # set default gpu device id torch.cuda.set_device(config.gpus[0]) # set seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) # using deterministic torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.backends.cudnn.enabled = True # get data with meta info input_size, input_channels, n_classes, train_data, valid_data = utils.get_data( config.dataset, config.data_path, config.cutout_length, validation=True) criterion = nn.CrossEntropyLoss().to(device) use_aux = config.aux_weight > 0. model = AugmentCNN(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype) # model size mb_params = utils.param_size(model) logger.info("Model size = {:.3f} MB".format(mb_params)) model = nn.DataParallel(model, device_ids=config.gpus).to(device) # weights optimizer optimizer = torch.optim.SGD(model.parameters(), config.lr, momentum=config.momentum, weight_decay=config.weight_decay) # get data loader if config.data_loader_type == 'Torch': train_loader = torch.utils.data.DataLoader( train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader( valid_data, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True) elif config.data_loader_type == 'DALI': config.dataset = config.dataset.lower() if config.dataset == 'cifar10': from DataLoaders_DALI import cifar10 train_loader = cifar10.get_cifar_iter_dali( type='train', image_dir=config.data_path, batch_size=config.batch_size, num_threads=config.workers) valid_loader = cifar10.get_cifar_iter_dali( type='val', image_dir=config.data_path, batch_size=config.batch_size, num_threads=config.workers) else: raise NotImplementedError else: raise NotImplementedError lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, config.epochs) best_top1 = 0. if config.data_loader_type == 'DALI': len_train_loader = get_train_loader_len(config.dataset.lower(), config.batch_size, is_train=True) else: len_train_loader = len(train_loader) # training loop for epoch in range(config.epochs): lr_scheduler.step() drop_prob = config.drop_path_prob * epoch / config.epochs model.module.drop_path_prob(drop_prob) # training train(train_loader, model, optimizer, criterion, epoch) # validation cur_step = (epoch + 1) * len_train_loader top1 = validate(valid_loader, model, criterion, epoch, cur_step) # save if best_top1 < top1: best_top1 = top1 is_best = True else: is_best = False utils.save_checkpoint(model, config.path, is_best) print("") logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
def main(): logger.info("Logger is set - training start") # set default gpu device id torch.cuda.set_device(config.gpus[0]) # set seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True # get data with meta info input_size, input_channels, n_classes, train_data, valid_data = utils.get_data( config.dataset, config.data_path, config.cutout_length, validation=True) criterion = nn.CrossEntropyLoss().to(device) use_aux = config.aux_weight > 0. model = AugmentCNN(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype) model = nn.DataParallel(model, device_ids=config.gpus).to(device) # model size mb_params = utils.param_size(model) logger.info("Model size = {:.3f} MB".format(mb_params)) # weights optimizer optimizer = torch.optim.SGD(model.parameters(), config.lr, momentum=config.momentum, weight_decay=config.weight_decay) train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config.epochs) best_top1 = 0. # training loop for epoch in range(config.epochs): lr_scheduler.step() drop_prob = config.drop_path_prob * epoch / config.epochs model.module.drop_path_prob(drop_prob) # training train(train_loader, model, optimizer, criterion, epoch) # validation cur_step = (epoch+1) * len(train_loader) top1 = validate(valid_loader, model, criterion, epoch, cur_step) # save if best_top1 < top1: best_top1 = top1 is_best = True else: is_best = False timebudget.report() utils.save_checkpoint(model, config.path, is_best) print("") logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
def main(): logger.info("Logger is set - training start") # set default gpu device id torch.cuda.set_device(config.gpus[0]) # set seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True # get data with meta info #input_size, input_channels, n_classes, train_data, valid_data = utils.get_data( # config.dataset, config.data_path, config.cutout_length, validation=True) input_size, input_channels, n_classes, train_data, test_dat, val_dat = utils.get_data( config.dataset, config.data_path, cutout_length=0, validation=True, validation2=True) print('input_size', input_size) criterion = nn.CrossEntropyLoss().to(device) use_aux = config.aux_weight > 0. #from evaluate #model = SearchCNNController(input_channels, config.init_channels, n_classes, config.layers, # net_crit, device_ids=config.gpus) model = AugmentCNN(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype) model = nn.DataParallel(model, device_ids=config.gpus).to(device) # model size mb_params = utils.param_size(model) logger.info("Model size = {:.3f} MB".format(mb_params)) # weights optimizer optimizer = torch.optim.SGD(model.parameters(), config.lr, momentum=config.momentum, weight_decay=config.weight_decay) # split data to train/validation best_top1 = 0. best_top_overall = -999 n_train = len(train_data) n_val = len(val_dat) n_test = len(test_dat) split = n_train // 2 indices1 = list(range(n_train)) indices2 = list(range(n_val)) indices3 = list(range(n_test)) train_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices1) valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices2) test_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices3) train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, sampler=train_sampler, num_workers=config.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(val_dat, batch_size=config.batch_size, sampler=valid_sampler, num_workers=config.workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_dat, batch_size=config.batch_size, sampler=test_sampler, num_workers=config.workers, pin_memory=True) """ train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True) """ lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, config.epochs) #lambda1 = lambda epoch: 0.95 ** epoch #lr_scheduler = torch.optim.lr_scheduler.RLambdaLR(optimizer, lr_lambda=[lambda1]) best_top1 = 0. # training loop for epoch in range(config.epochs): lr_scheduler.step() drop_prob = config.drop_path_prob * epoch / config.epochs model.module.drop_path_prob(drop_prob) # training train(train_loader, model, optimizer, criterion, epoch) # validation cur_step = (epoch + 1) * len(train_loader) top1 = validate(valid_loader, model, criterion, epoch, cur_step) # save if best_top1 < top1: best_top1 = top1 is_best = True else: is_best = False #utils.save_checkpoint(model, config.path, is_best) utils.save_checkpoint2(model, epoch, optimizer, criterion, config.path, is_best) print("") logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
def evaluate(self, trial_no, trial_hyperparams): """Evaluates objective function Trains the child model k times with same augmentation hyperparameters. k is determined by the user by `opt_samples` argument. Args: trial_no (int): no of trial. needed for recording to notebook trial_hyperparams (list) Returns: float: trial-cost = 1 - avg. rewards from samples """ augmented_data = augment_by_policy(self.data["X_train"], self.data["y_train"], *trial_hyperparams) sample_rewards = [] #pytorch layers = 2 init_channels = 24 use_aux = True epochs = 30 lr = 0.01 momentum = 0.995 weight_decay = 0.995 drop_path_prob = 0.2 genotype = "Genotype(normal=[[('dil_conv_3x3', 0), ('sep_conv_5x5', 1)], [('sep_conv_3x3', 1), ('avg_pool_3x3', 0)],[('dil_conv_3x3', 1), ('dil_conv_3x3', 0)], [('sep_conv_3x3', 3), ('skip_connect', 1)]], normal_concat=range(2, 6), reduce=[[('sep_conv_3x3', 1), ('dil_conv_5x5', 0)], [('skip_connect', 0), ('sep_conv_5x5', 1)], [('sep_conv_5x5', 1),('sep_conv_5x5', 0)], [('max_pool_3x3', 1), ('sep_conv_3x3', 0)]], reduce_concat=range(2, 6))" model = AugmentCNN(self.input_size, self.input_channels, init_channels, self.n_classes, layers, use_aux, genotype) model = nn.DataParallel(model, device_ids='0').to(device) # model size mb_params = utils.param_size(model) logger.info("Model size = {:.3f} MB".format(mb_params)) # weights optimizer optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay) a = 2 / 0 """ for sample_no in range(1, self.opt_samples + 1): self.child_model.load_pre_augment_weights() # TRAIN history = self.child_model.fit(self.data, augmented_data) # reward = self.calculate_reward(history) sample_rewards.append(reward) self.notebook.record( trial_no, trial_hyperparams, sample_no, reward, history ) """ best_top1 = -9999 for epoch in range(epochs): lr_scheduler.step() drop_prob = drop_path_prob * epoch / epochs model.module.drop_path_prob(drop_prob) # training train(train_loader, model, optimizer, criterion, epoch) # validation cur_step = (epoch + 1) * len(train_loader) top1 = validate(valid_loader, model, criterion, epoch, cur_step) # save if best_top1 < top1: best_top1 = top1 is_best = True else: is_best = False print('best_top1:', best_top1) #sample_rewards.append(reward) #self.notebook.record( # trial_no, trial_hyperparams, sample_no, reward, history #) #trial_cost = 1 - np.mean(sample_rewards) #self.notebook.save() log_and_print( f"{str(trial_no)}, {str(trial_cost)}, {str(trial_hyperparams)}", self.logging, ) #return trial_cost return best_top1
def worker(gpu, ngpus_per_node, config_in): # init config = copy.deepcopy(config_in) jobid = os.environ["SLURM_JOBID"] procid = int(os.environ["SLURM_PROCID"]) config.gpu = gpu if config.gpu is not None: writer_name = "tb.{}-{:d}-{:d}".format(jobid, procid, gpu) logger_name = "{}.{}-{:d}-{:d}.aug.log".format(config.name, jobid, procid, gpu) ploter_name = "{}-{:d}-{:d}".format(jobid, procid, gpu) ck_name = "{}-{:d}-{:d}".format(jobid, procid, gpu) else: writer_name = "tb.{}-{:d}-all".format(jobid, procid) logger_name = "{}.{}-{:d}-all.aug.log".format(config.name, jobid, procid) ploter_name = "{}-{:d}-all".format(jobid, procid) ck_name = "{}-{:d}-all".format(jobid, procid) writer = SummaryWriter(log_dir=os.path.join(config.path, writer_name)) writer.add_text('config', config.as_markdown(), 0) logger = utils.get_logger(os.path.join(config.path, logger_name)) config.print_params(logger.info) # get cuda device device = torch.device('cuda', gpu) # begin logger.info("Logger is set - training start") if config.dist_url == "env://" and config.rank == -1: config.rank = int(os.environ["RANK"]) if config.mp_dist: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes config.rank = config.rank * ngpus_per_node + gpu # print('back:{}, dist_url:{}, world_size:{}, rank:{}'.format(config.dist_backend, config.dist_url, config.world_size, config.rank)) dist.init_process_group(backend=config.dist_backend, init_method=config.dist_url, world_size=config.world_size, rank=config.rank) # get data with meta info input_size, input_channels, n_classes, train_data, valid_data = utils.get_data( config.dataset, config.data_path, config.cutout_length, validation=True) # build model criterion = nn.CrossEntropyLoss().to(device) use_aux = config.aux_weight > 0. model = AugmentCNN(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype) if config.gpu is not None: torch.cuda.set_device(config.gpu) # model = model.to(device) model.cuda(config.gpu) # When using a single GPU per process and per DistributedDataParallel, we need to divide # the batch size ourselves based on the total number of GPUs we have config.batch_size = int(config.batch_size / ngpus_per_node) config.workers = int( (config.workers + ngpus_per_node - 1) / ngpus_per_node) # model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[config.rank]) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[config.gpu]) # model = torch.nn.parallel.DistributedDataParallel(model, device_ids=None, output_device=None) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) # model size mb_params = utils.param_size(model) logger.info("Model size = {:.3f} MB".format(mb_params)) # weights optimizer optimizer = torch.optim.SGD(model.parameters(), config.lr, momentum=config.momentum, weight_decay=config.weight_decay) # load data train_sampler = data.distributed.DistributedSampler( train_data, num_replicas=config.world_size, rank=config.rank) valid_sampler = data.distributed.DistributedSampler( valid_data, num_replicas=config.world_size, rank=config.rank) train_loader = data.DataLoader(train_data, batch_size=config.batch_size, sampler=train_sampler, shuffle=False, num_workers=config.workers, pin_memory=True) valid_loader = data.DataLoader(valid_data, batch_size=config.batch_size, sampler=valid_sampler, shuffle=False, num_workers=config.workers, pin_memory=True) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, config.epochs) # setting the privacy protecting procedure if config.dist_privacy: logger.info("PRIVACY ENGINE OFF") best_top1 = 0. # training loop for epoch in range(config.epochs): # lr_scheduler.step() drop_prob = config.drop_path_prob * epoch / config.epochs model.module.drop_path_prob(drop_prob) # training train(logger, writer, device, config, train_loader, model, optimizer, criterion, epoch) lr_scheduler.step() # validation cur_step = (epoch + 1) * len(train_loader) top1 = validate(logger, writer, device, config, valid_loader, model, criterion, epoch, cur_step) # save if best_top1 < top1: best_top1 = top1 is_best = True else: is_best = False utils.save_checkpoint(model, config.path, ck_name, 'aug', is_best) print("") logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
def main(): logger.info("Logger is set - training start") fileRoot = r'/home/hlu/Data/VIPL' saveRoot = r'/home/hlu/Data/VIPL_STMap' + str(config.fold_num) + str(config.fold_index) n_classes = 1 input_channels = 3 input_size = np.array([64, 300]) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) toTensor = transforms.ToTensor() resize = transforms.Resize(size=(64, 300)) # set default gpu device id torch.cuda.set_device(config.gpus[0]) # set seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) # net acc torch.backends.cudnn.benchmark = True # get data with meta info if config.reData == 1: test_index, train_index = MyDataset.CrossValidation(fileRoot, fold_num=config.fold_num, fold_index=config.fold_index) Train_Indexa = MyDataset.getIndex(fileRoot, train_index, saveRoot + '_Train', 'STMap_YUV_Align_CSI_POS.png', 15, 300) Test_Indexa = MyDataset.getIndex(fileRoot, test_index, saveRoot + '_Test', 'STMap_YUV_Align_CSI_POS.png', 15, 300) train_data = MyDataset.Data_STMap(root_dir=(saveRoot + '_Train'), frames_num=300, transform=transforms.Compose([resize, toTensor, normalize])) valid_data = MyDataset.Data_STMap(root_dir=(saveRoot + '_Test'), frames_num=300, transform=transforms.Compose([resize, toTensor, normalize])) train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True) # loss criterion = nn.L1Loss().to(device) # net Model_name = config.name + 'fn' + str(config.fold_num) + 'fi' + str(config.fold_index) use_aux = config.aux_weight > 0. if config.reTrain == 1: model = torch.load(os.path.join(config.path, Model_name + 'best.pth.tar'), map_location=device) print('load ' + Model_name + ' right') model = nn.DataParallel(model, device_ids=config.gpus).to(device) else: model = AugmentCNN(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype) model._init_weight() model = nn.DataParallel(model, device_ids=config.gpus).to(device) # model size mb_params = utils.param_size(model) logger.info("Model size = {:.3f} MB".format(mb_params)) # weights optimizer optimizer = torch.optim.Adam(model.parameters(), config.lr) best_losses = 10 # training loop for epoch in range(config.epochs): # training train(train_loader, model, optimizer, criterion, epoch) # validation cur_step = (epoch+1) * len(train_loader) best_losses = validate(valid_loader, model, criterion, epoch, cur_step, best_losses) logger.info("Final best Losses@1 = {:.4%}".format(best_losses))