def main(config): # ensure directories are setup prepare_dirs(config) # ensure reproducibility torch.manual_seed(config.random_seed) kwargs = {} if config.use_gpu: torch.cuda.manual_seed(config.random_seed) kwargs = {'num_workers': 1, 'pin_memory': True} # instantiate data loaders if config.is_train: data_loader = get_train_valid_loader(config.data_dir, config.batch_size, config.random_seed, config.valid_size, config.shuffle, config.show_sample, **kwargs) else: data_loader = get_test_loader(config.data_dir, config.batch_size, **kwargs) # instantiate trainer trainer = Trainer(config, data_loader) # either train if config.is_train: save_config(config) trainer.train() # or load a pretrained model and test else: trainer.test()
def _validate_one_epoch(self, model): """ Evaluate the model on the validation set. """ model.eval() # setup valid loader if self.data_loader is None: space = self.optim_params['batch_size'] batch_size = sample_from(space) self.data_loader = get_train_valid_loader( self.data_dir, self.args.name, batch_size, self.args.valid_size, self.args.shuffle, **self.kwargs) val_loader = self.data_loader[1] num_valid = len(val_loader.sampler.indices) val_loss = 0. for i, (x, y) in enumerate(val_loader): if self.num_gpu > 0: x, y = x.cuda(), y.cuda() x = x.view(x.size(0), -1) x, y = Variable(x), Variable(y) output = model(x) val_loss += F.nll_loss(output, y, size_average=False).data[0] val_loss /= num_valid return val_loss
def test_glimpse(): config, unparsed = get_config() train_loader, _ = get_train_valid_loader(config.data_dir, config.batch_size, config.random_seed, config.valid_size, config.shuffle, config.show_sample) img, label = next(iter(train_loader)) r = retina(g=8, k=1, s=1) g = glimpse_network(block=BasicBlock, h_g=128, h_l=32, h_s=128, g=8, k=1, s=1, c=3) l_t_prev = torch.rand(config.batch_size, 2).uniform_(-1, 1) size_t_prev = torch.rand(config.batch_size, 5).uniform_(0, 1) #print(g.feature_extractors["size_32"]) g_t = g(img, l_t_prev, size_t_prev) return g_t
def main(config): # ensure directories are setup prepare_dirs(config) # ensure reproducibility torch.manual_seed(config.random_seed) kwargs = {} if torch.cuda.is_available(): torch.cuda.manual_seed(config.random_seed) kwargs = {'num_workers': 4, 'pin_memory': False} # instantiate data loaders if config.is_train: data_loader = get_train_valid_loader(config.data_dir, config.dataset, config.batch_size, config.random_seed, config.exp, config.valid_size, config.shuffle, **kwargs) else: data_loader = get_test_loader(config.data_dir, config.dataset, config.batch_size, config.exp, config.familiar, **kwargs) # instantiate trainer trainer = Trainer(config, data_loader) if config.is_train: trainer.train() else: if config.attack: trainer.test_attack() else: trainer.test()
def main(config): utils.prepare_dirs(config) # ensure reproducibility torch.manual_seed(config.random_seed) kwargs = {} if config.use_gpu: torch.cuda.manual_seed(config.random_seed) kwargs = {"num_workers": 1, "pin_memory": True} # instantiate data loaders if config.is_train: dloader = data_loader.get_train_valid_loader( config.data_dir, config.batch_size, config.random_seed, config.valid_size, config.shuffle, config.show_sample, **kwargs, ) else: dloader = data_loader.get_test_loader( config.data_dir, config.batch_size, **kwargs, ) trainer = Trainer(config, dloader) # either train if config.is_train: utils.save_config(config) trainer.train() # or load a pretrained model and test else: trainer.test()
def __init__(self, args, model, params): """ Initialize the Hyperband object. Args ---- - args: object containing command line arguments. - model: the `Sequential()` model you wish to tune. - params: a dictionary where the key is the hyperparameter to tune, and the value is the space from which to randomly sample it. """ self.args = args self.model = model self._parse_params(params) # hyperband params self.epoch_scale = args.epoch_scale self.max_iter = args.max_iter self.eta = args.eta self.s_max = int(np.log(self.max_iter) / np.log(self.eta)) self.B = (self.s_max + 1) * self.max_iter print( "[*] max_iter: {}, eta: {}, B: {}".format( self.max_iter, self.eta, self.B ) ) # misc params self.data_dir = args.data_dir self.ckpt_dir = args.ckpt_dir self.num_gpu = args.num_gpu self.print_freq = args.print_freq # device #self.device = torch.device("cuda" if self.num_gpu > 0 else "cpu") self.device = torch.device("cpu") # data params self.data_loader = None self.kwargs = {} if self.num_gpu > 0: self.kwargs = {'num_workers': 1, 'pin_memory': True} if 'batch_size' not in self.optim_params: self.batch_hyper = False # self.data_loader = get_train_valid_loader( # args.data_dir, args.name, args.batch_size, # args.valid_size, args.shuffle, **self.kwargs # ) self.data_loader = get_train_valid_loader( args.batch_size, args.valid_size, args.shuffle, **self.kwargs ) # optim params self.def_optim = args.def_optim self.def_lr = args.def_lr self.patience = args.patience
def main(config): # ensure reproducibility torch.manual_seed(config.random_seed) kwargs = {} if config.cuda: torch.cuda.manual_seed(config.random_seed) kwargs = {'num_workers': 1, 'pin_memory': True} scores = [] # instantiate data loaders count = 0 times = [] for i in [1, 2, 3]: start = time.time() count = i train_data, test_data = load_dataset(config.data_dir, str(count)) # instantiate data loaders data_loader = get_train_valid_loader(train_data, config.batch_size, config.random_seed, config.valid_size, config.shuffle, config.show_sample, **kwargs) test_loader = get_test_loader(test_data, config.batch_size, **kwargs) # instantiate trainer trainer = Trainer(config, count, data_loader, test_loader) trainer.train() result = trainer.test() scores.append(result) elapsed = time.time() - start times.append(elapsed) scores = np.array(scores) times = np.array(times) print('>>> scores:', scores) print('aver time', times.mean()) # print('avg\tacc\tf1\tprec\trec\tauc') print('acc:', scores.mean(axis=0)[0], '\nf1', scores.mean(axis=0)[1], '\nprec', scores.mean(axis=0)[2], '\nrec', scores.mean(axis=0)[3]) print('>>> std') print('acc:', scores.std(axis=0)[0], '\nf1', scores.std(axis=0)[1], '\nprec', scores.std(axis=0)[2], '\nrec', scores.std(axis=0)[3])
def __init__(self): trainloader, validate_loader = data_loader.get_train_valid_loader('/home/yanan/train_data', batch_size=128, augment=True, valid_size=0.1, shuffle=True, random_seed=2312390, show_sample=False, num_workers=1, pin_memory=True) #testloader = data_loader.get_test_loader('/home/yanan/train_data', batch_size=128, shuffle=False, num_workers=1, pin_memory=True) net = EvoCNNModel() cudnn.benchmark = True net = net.cuda() criterion = nn.CrossEntropyLoss() best_acc = 0.0 self.net = net self.criterion = criterion self.best_acc = best_acc self.trainloader = trainloader self.validate_loader = validate_loader self.file_id = os.path.basename(__file__).split('.')[0]
def read_dataset(self): self.train_loader, self.valid_loader = get_train_valid_loader( data_dir=cfg.dataset.data_dir, dataset_type=cfg.dataset.dataset_name, train_batch_size=cfg.train.batch_size, valid_batch_size=cfg.validation.batch_size, augment=False if cfg.dataset.dataset_name == 'mnist' else True, random_seed=cfg.dataset.seed, valid_size=cfg.train.valid_part, shuffle=True, show_sample=False, num_workers=multiprocessing.cpu_count(), pin_memory=False) return
def main(config): # ensure directories are setup prepare_dirs(config) # ensure reproducibility torch.manual_seed(config.random_seed) kwargs = {} if config.use_gpu: torch.cuda.manual_seed(config.random_seed) kwargs = {'num_workers': 1, 'pin_memory': True} # instantiate data loaders if config.is_train: data_loader = get_train_valid_loader(config.data_dir, config.batch_size, config.random_seed, config.valid_size, config.shuffle, config.show_sample, **kwargs) else: data_loader = get_test_loader(config.data_dir, config.batch_size, **kwargs) # for data, target in data_loader: # print(data.size()) # print(target.size()) # break # # inputs, classes = next(iter(data_loader)) # print(data_loader) # out = torchvision.utils.make_grid(inputs) # class_names = np.arange(0,10) # imshow(out, title=[class_names[x] for x in classes]) # instantiate trainer trainer = Trainer(config, data_loader) # either train if config.is_train: save_config(config) trainer.train() # or load a pretrained model and test else: trainer.test()
def _train_one_epoch(self, model, num_passes, reg_layers): """ Train the model for 1 epoch of the training set. An epoch corresponds to one full pass through the entire training set in successive mini-batches. If `num_passes` is not None, the model is trained for `num_passes` mini-batch iterations. """ model.train() # setup optimizer optim = self._get_optimizer(model) # setup train loader if self.data_loader is None: space = self.optim_params['batch_size'] batch_size = sample_from(space) self.data_loader = get_train_valid_loader( self.data_dir, self.args.name, batch_size, self.args.valid_size, self.args.shuffle, **self.kwargs) train_loader = self.data_loader[0] num_train = len(train_loader.sampler.indices) for i, (x, y) in enumerate(train_loader): if num_passes is not None: if i > num_passes: return if self.num_gpu > 0: x, y = x.cuda(), y.cuda() batch_size = x.shape[0] x = x.view(batch_size, -1) x, y = Variable(x), Variable(y) optim.zero_grad() output = model(x) loss = F.nll_loss(output, y) reg_loss = self._get_reg_loss(model, reg_layers) comp_loss = loss + reg_loss comp_loss.backward() optim.step()
def main(): wandb.init("AVA") config, unparsed = get_config() # ensure reproducibility torch.manual_seed(config.random_seed) if config.use_gpu: torch.cuda.manual_seed(config.random_seed) # instantiate data loaders if config.is_train: data_loader = get_train_valid_loader(task='MNIST', batch_size=config.batch_size, random_seed=config.random_seed, valid_size=config.valid_size) else: data_loader = get_test_loader(task='MNIST', batch_size=config.batch_size) wandb.config.update(config) # instantiate trainer trainer = Trainer(config, data_loader) # either train trainer.train()
def main(config): # ensure reproducibility torch.manual_seed(config.random_seed) kwargs = {} if config.use_gpu: torch.cuda.manual_seed(config.random_seed) kwargs = {'num_workers': 1, 'pin_memory': True} data_loader = get_train_valid_loader( config.data_dir, config.random_seed, config.batch_size, config.valid_size, config.shuffle, config.semi, config.pc_size, config.binary, config.cat, **kwargs ) trainer = Trainer(config, data_loader) save_config(config) trainer.train() trainer.test()
optimizer = optim.SGD(model.classifier.parameters(), lr=0.001, momentum=opt.momentum) # Decay LR by a factor of 0.1 every 7 epochs exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) save_model_file_name = model_name + '_ff_' train_loader, valid_loader, train_size, valid_size,\ classes = get_train_valid_loader(train_data_dir, train_batch_size=opt.batch_size, val_batch_size=opt.val_batch_size, train_transform=train_transform, valid_transform=valid_transform, random_seed=opt.seed, valid_size=opt.split, shuffle=True, num_workers=opt.threads, pin_memory=True) data_iter = iter(train_loader) images, labels = data_iter.next() print('no of classes : {}'.format(len(classes))) model, result_logs[model_name] = train_model( model, train_loader, valid_loader, train_size, valid_size,
def main(config): # ensure directories are setup prepare_dirs(config) # create Omniglot data loaders torch.manual_seed(config.random_seed) kwargs = {} if config.use_gpu: torch.cuda.manual_seed(config.random_seed) kwargs = {'num_workers': 1, 'pin_memory': True} if config.is_train: data_loader = get_train_valid_loader( config.data_dir, config.batch_size, config.num_train, config.augment, config.way, config.valid_trials, config.shuffle, config.random_seed, **kwargs ) else: data_loader = get_test_loader( config.data_dir, config.way, config.test_trials, config.random_seed, **kwargs ) # sample 3 layer wise hyperparams if first time training if config.is_train and not config.resume: print("[*] Sampling layer hyperparameters.") layer_hyperparams = { 'layer_init_lrs': [], 'layer_end_momentums': [], 'layer_l2_regs': [] } for i in range(6): # sample lr = random.uniform(1e-4, 1e-1) mom = random.uniform(0, 1) reg = random.uniform(0, 0.1) # store layer_hyperparams['layer_init_lrs'].append(lr) layer_hyperparams['layer_end_momentums'].append(mom) layer_hyperparams['layer_l2_regs'].append(reg) try: save_config(config, layer_hyperparams) except ValueError: print( "[!] Samples already exist. Either change the model number,", "or delete the json file and rerun.", sep=' ' ) return # else load it from config file else: try: layer_hyperparams = load_config(config) except FileNotFoundError: print("[!] No previously saved config. Set resume to False.") return trainer = Trainer(config, data_loader, layer_hyperparams) if config.is_train: trainer.train() else: trainer.test()
metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() kwargs = {'num_workers': 4, 'pin_memory': True} if args.cuda else {} train_loader, valid_loader = data_loader.get_train_valid_loader( 'data', args.batch_size, False, 0, shuffle=False) test_loader = torch.utils.data.DataLoader(datasets.MNIST( 'data', train=False, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))])), batch_size=args.batch_size, shuffle=True, **kwargs) class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(784, 500)
add_blocks(1, 40, 40, 1) add_blocks(2, 40, 40, 1) add_blocks(3, 40, 20, 2) # Initialize block_probs to be an even distribution(1/n) block_probs = [[] for i in range(n_blocks)] for i in range(n_blocks): block_probs[i] = [(1/len(blocks[i])) for j in range(len(blocks[i]))] # Initialize block_weights to all start at 0(used to be 1/n) block_weights = [[] for i in range(n_blocks)] for i in range(n_blocks): block_weights[i] = [0 for j in range(len(blocks[i]))] train_loader, valid_loader = custom_dl.get_train_valid_loader(data_dir=root+'/data/cifar10/', batch_size=16, augment=False, random_seed=1) test_loader = custom_dl.get_test_loader(data_dir=root+'/data/cifar10/', batch_size=16) def save_grad(module, gradInput, gradOutput): module.block_grad = gradInput[0] def save_forward(module, forwardInput, output): module.block_forward = output[0] def train_layer(data, i, blocks): net_blocks = [] # For each block layer, chose a random block to train for block_layer in blocks: block_i = np.random.choice(len(block_layer)) block = block_layer[block_i]
def main(): # Load the meta data file df = pd.read_csv('./data/train.csv') df, label_encoder = utility.encode_labels(df) num_classes = len(df['label'].value_counts()) np.save('./data/label_encoder_classes.npy', label_encoder.classes_) # Generate the ZCA matrix if enabled for image_size in IMAGE_SIZES: # train for every res if APPLY_ZCA_TRANS: print("Making ZCA matrix ...") data_loader = dl.get_full_data_loader(df, data_dir=DATA_DIR, batch_size=BATCH_SIZE, image_size=image_size) train_dataset_arr = next(iter(data_loader))[0].numpy() zca = utility.ZCA() zca.fit(train_dataset_arr) zca_dic = {"zca_matrix": zca.ZCA_mat, "zca_mean": zca.mean} savemat("./data/zca_data.mat", zca_dic) print("Completed making ZCA matrix") # Define normalization normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ) # Define specific transforms train_transform = transforms.Compose([ utility.AddPadding(), transforms.Resize((image_size, image_size)), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomRotation(degrees=(-90, 90)), transforms.RandomVerticalFlip(p=0.5), transforms.ColorJitter(.4, .4, .4), transforms.ToTensor(), normalize ]) valid_transform = transforms.Compose([ utility.AddPadding(), transforms.Resize((image_size, image_size)), transforms.ToTensor(), normalize ]) # Create a train and valid dataset train_dataset = dl.HotelImagesDataset(df, root_dir=DATA_DIR, transform=train_transform) valid_dataset = dl.HotelImagesDataset(df, root_dir=DATA_DIR, transform=valid_transform) # Get a train and valid data loader train_loader, valid_loader = dl.get_train_valid_loader( train_dataset, valid_dataset, batch_size=BATCH_SIZE, random_seed=0) for net_type in NETS: # train for every net model = utility.initialize_net(num_classes, net_type, feature_extract=FEATURE_EXTRACT) # If old model exists, take state from it if path.exists(f"./models/model_{net_type}.pt"): print("Resuming training on trained model ...") model = utility.load_latest_model( model, f'./models/model_{net_type}.pt') # Gather the parameters to be optimized/updated in this run. params_to_update = utility.get_model_params_to_train( model, FEATURE_EXTRACT) # Send model to GPU device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) # Make criterion criterion = nn.CrossEntropyLoss() # Make optimizer + scheduler optimizer = torch.optim.SGD(params_to_update, lr=0.01, momentum=0.9) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.01, patience=3) trained_model = trainer.train_model( device=device, model=model, optimizer=optimizer, criterion=criterion, train_loader=train_loader, valid_loader=valid_loader, scheduler=scheduler, net_type=net_type, epochs=EPOCHS, apply_zca_trans=APPLY_ZCA_TRANS) utility.save_current_model(trained_model, f"./models/model_{net_type}.pt")
def main(config): # ensure directories are setup prepare_dirs(config) # ensure reproducibility torch.manual_seed(config.random_seed) kwargs = {} if config.use_gpu: torch.cuda.manual_seed(config.random_seed) kwargs = {'num_workers': 1, 'pin_memory': True} # instantiate data loaders if config.dataset == 'mnist': if config.is_train: data_loader = get_train_valid_loader(config.data_dir, config.batch_size, config.random_seed, config.valid_size, config.shuffle, config.show_sample, **kwargs) else: data_loader = get_test_loader(config.data_dir, config.batch_size, **kwargs) elif config.dataset == 'celeba': if config.is_train: trainer, validator, _ = get_train_celeba_loader( config.celeba_image_dir, config.attr_path, config.selected_attrs, config.celeba_crop_size, config.image_size, config.batch_size, config.mode, config.num_workers, 0.1, config.show_sample) data_loader = (trainer, validator) else: _, _, data_loader = get_train_celeba_loader( config.celeba_image_dir, config.attr_path, config.selected_attrs, config.celeba_crop_size, config.image_size, config.batch_size, config.mode, config.num_workers, 0.1, config.show_sample) elif config.dataset == 'celebhq': if config.is_train: trainer, validator, _ = get_train_celebhq_loader( config.celebhq_image_dir, config.hq_attr_path, config.selected_attrs, config.celeba_crop_size, config.image_size, config.batch_size, config.mode, config.num_workers, 0.1, config.show_sample) data_loader = (trainer, validator) else: _, _, data_loader = get_train_celebhq_loader( config.celebhq_image_dir, config.hq_attr_path, config.selected_attrs, config.celeba_crop_size, config.image_size, config.batch_size, config.mode, config.num_workers, 0.1, config.show_sample) # instantiate trainer trainer = Trainer(config, data_loader) # either train if config.is_train: save_config(config) trainer.train() # or load a pretrained model and test else: trainer.test()
def run_config(self, model, num_iters): """ Train a particular hyperparameter configuration for a given number of iterations and evaluate the loss on the validation set. For hyperparameters that have previously been evaluated, resume from a previous checkpoint. Args ---- - model: the mutated model to train. - num_iters: an int indicating the number of iterations to train the model for. Returns ------- - val_loss: the lowest validaton loss achieved. """ # load the most recent checkpoint if it exists try: ckpt = self._load_checkpoint(model.ckpt_name) model.load_state_dict(ckpt['state_dict']) except FileNotFoundError: pass if self.num_gpu > 0: model = model.cuda() # parse reg params reg_layers = self._add_reg(model) # setup train loader if self.data_loader is None: self.batch_hyper = True space = self.optim_params['batch_size'] batch_size = sample_from(space) tqdm.write("batch size: {}".format(batch_size)) self.data_loader = get_train_valid_loader( self.data_dir, self.args.name, batch_size, self.args.valid_size, self.args.shuffle, **self.kwargs) # training logic min_val_loss = 999999 counter = 0 num_epochs = int(num_iters) if self.epoch_scale else 1 num_passes = None if self.epoch_scale else num_iters for epoch in range(num_epochs): self._train_one_epoch(model, num_passes, reg_layers) val_loss = self._validate_one_epoch(model) if val_loss < min_val_loss: min_val_loss = val_loss counter = 0 else: counter += 1 if counter > self.patience: return 999999 if self.batch_hyper: self.data_loader = None state = { 'state_dict': model.state_dict(), 'min_val_loss': min_val_loss, } self._save_checkpoint(state, model.ckpt_name) return min_val_loss
def find_lr(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Build dataset train_loader, _, _, n_classes = dl.get_train_valid_loader( data_dir='data/train_images', meta_data_file='data/train.csv', batch_size=BATCH_SIZE, augment=True, random_seed=0) # Make resnet model = utility.initialize_net(n_classes, 'resnet18', feature_extract=USE_FEATURE_EXTRACT) model = model.to(device) params_to_update = model.parameters() if USE_FEATURE_EXTRACT: params_to_update = [] for name, param in model.named_parameters(): if param.requires_grad == True: params_to_update.append(param) init_value = 1e-8 final_value = 100.0 number_in_epoch = len(train_loader) - 1 update_step = (final_value / init_value)**(1 / number_in_epoch) lr = init_value criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(params_to_update, lr=lr) optimizer.param_groups[0]["lr"] = lr best_loss = 0.0 batch_num = 0 losses = [] log_lrs = [] for inputs, labels in train_loader: inputs = inputs.to(device) labels = labels.to(device) batch_num += 1 optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) # Crash out if loss explodes if batch_num > 1 and loss > 4 * best_loss: print("Loss exploded") return log_lrs[10:-5], losses[10:-5] # Record the best loss if loss < best_loss or batch_num == 1: best_loss = loss # Store the values losses.append(loss) log_lrs.append(lr) # Do the backward pass and optimize loss.backward() optimizer.step() # Update the lr for the next step and store lr *= update_step optimizer.param_groups[0]["lr"] = lr return log_lrs[10:-5], losses[10:-5]
def main(data_dir): siamese_model = Siamese() batch_size = 4 num_train = 30000 augment = True way = 20 trials = 300 epochs = 50 train_loader, val_loader = get_train_valid_loader(data_dir, batch_size, num_train, augment, way, trials, pin_memory=True) criterion = torch.nn.BCELoss() optimizer = torch.optim.SGD(siamese_model.parameters(), lr=1e-3, momentum=0.9) lambda1 = lambda epoch: 0.99**epoch #scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda1) writer = SummaryWriter() siamese_model.cuda() best_accuracy = 0.0 for i in range(epochs): siamese_model.train() #batch_count = 0 avg_train_loss = 0.0 for it, (img_1, img_2, labels) in enumerate(train_loader): optimizer.zero_grad() img_1 = img_1.cuda() img_2 = img_2.cuda() labels = labels.cuda() preds = siamese_model(img_1, img_2) loss = criterion(preds, labels) avg_train_loss += loss.item() writer.add_scalar('Loss_train', loss.item(), len(train_loader) * i + it) loss.backward() optimizer.step() #batch_count+=1 #print(batch_count) siamese_model.eval() count = 0 with torch.no_grad(): for ref_images, candidates in val_loader: ref_images = ref_images.cuda() candidates = candidates.cuda() preds = siamese_model(ref_images, candidates) if torch.argmax(preds) == 0: count += 1 if count / len(val_loader) > best_accuracy: best_accuracy = count / len(val_loader) torch.save(siamese_model.state_dict(), 'best_model.pth') writer.add_scalar('Accuracy_validation', count / trials, i) print('Epoch {} | Train loss {} | Val accuracy {}'.format( i, avg_train_loss / len(train_loader), count / trials)) #scheduler.step() writer.flush() best_model = Siamese().cuda() best_model.load_state_dict(torch.load('best_model.pth')) best_model.eval() trials = 400 test_loader = get_test_loader(data_dir, way, trials) test_count = 0 with torch.no_grad(): for ref_images, candidates in test_loader: ref_images = ref_images.cuda() candidates = candidates.cuda() preds = best_model(ref_images, candidates) if torch.argmax(preds) == 0: test_count += 1 print('Test Accuracy {}'.format(test_count / len(test_loader)))
def train_(model_ = model.Net()): train_loader,valid_loader= data_loader.get_train_valid_loader(data_dir='./data',batch_size=16,augment=False,random_seed=1230,show_sample=False,num_workers=6,pin_memory=False) cudnn.benchmark = True device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('Using Device: %s' %device) net = model_ net = net.cuda() optimizer, criterion = optimizer_(net) start_time = time.time() total_epoch = 3 for epoch in tqdm(range(total_epoch)): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(train_loader, 0): # get the inputs inputs, labels = data inputs, labels = inputs.cuda(), labels.cuda() # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.cuda() loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 1000 == 999: # print every 1000 mini-batches correct = 0 total = 0 with torch.no_grad(): valid_running_loss = 0 for data in valid_loader: images, labels = data images, labels = images.to(device), labels.to(device) outputs = net(images) valid_loss = criterion(outputs, labels) valid_loss.cuda() valid_running_loss += valid_loss.item() _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('[%d, %5d] loss: %.3f ' % (epoch + 1, i + 1, running_loss / 1000)) print(' Validation Set, loss = %.3f, Acc.: %.2f' %(valid_running_loss/len(valid_loader), correct/total)) running_loss = 0.0 valid_running_loss = 0 end_time = time.time() print('Finished Training') print("Duration : {:d}".format(end_time-start_time)) return net
def train_model(): # Init device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Initialize a new wandb run wandb.init() # Config is a variable that holds and saves hyperparameters and inputs config = wandb.config # Load the meta data file df = pd.read_csv('data/train.csv', ) df = df.drop(['timestamp'], axis=1) df, _ = utility.encode_labels(df) num_classes = len(df['label'].value_counts()) # Build the dataset train_loader, valid_loader = dl.get_train_valid_loader( df, data_dir='data/train_images', batch_size=config.batch_size, image_size=IMAGE_SIZE, augment=True, random_seed=0) # Make resnet model = utility.initialize_net(num_classes, config.resnet_type, config.use_feature_extract) model = model.to(device) # Gather the parameters to be optimized/updated in this run. params_to_update = utility.get_model_params_to_train( model, config.use_feature_extract) # Define criterion + optimizer criterion = nn.CrossEntropyLoss() if config.optimizer == 'sgd': optimizer = optim.SGD(params_to_update, lr=config.learning_rate) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(params_to_update, lr=config.learning_rate) elif config.optimizer == 'adam': optimizer = optim.Adam(params_to_update, lr=config.learning_rate) # Define scheduler scheduler = optim.lr_scheduler.OneCycleLR( optimizer=optimizer, max_lr=10, epochs=config.epochs, anneal_strategy=config.scheduler, steps_per_epoch=len(train_loader)) trainer.train_model(device=device, model=model, optimizer=optimizer, criterion=criterion, train_loader=train_loader, valid_loader=valid_loader, scheduler=scheduler, epochs=config.epochs, send_to_wandb=True)
pin_memory = True else: use_gpu = 0 pin_memory = False writer = SummaryWriter() ## ######################################################################################################## #%% Setup Dataset and DataLoaders: train_loader, valid_loader = get_train_valid_loader(data_dir, batch_size, augment=1, random_seed=1, valid_size=0.1, shuffle=True, show_sample=False, num_workers=n_threads, pin_memory=pin_memory) #the get_train_valid_loader above is a high level wrapper for the code below. It #allows random shuffling, doing train/val splits, and data augmentation. ## ######################################################################################################## #%% Setup Model # #Net = models.vgg16(pretrained=True) # Q: how can i later delete the last 1-2 layers and use my own fully connected layer?
help='save path') parser.add_argument('--load', type=str, default='none', help='load path') parser.add_argument('--epochs', type=int, default=750, metavar='N', help='number of epochs to train (default: 750)') parser.add_argument('--lr', type=float, default=0.1, metavar='LR', help='learning rate (default: 0.1)') parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.9)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() kwargs = {'num_workers': 4, 'pin_memory': True} if args.cuda else {} train_loader, valid_loader = data_loader.get_train_valid_loader('./data', 'CIFAR100',args.batch_size, True, 0, shuffle = False) test_loader = torch.utils.data.DataLoader( datasets.CIFAR100('./data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914,0.4822,0.4465), (0.2023,0.1994,0.2010)) ])), batch_size=args.batch_size, shuffle=True, **kwargs) model = resnet.resnet20_cifar(num_classes=100, sig=True) if args.cuda: model.cuda() if args.load != 'none': state_dict = torch.load(args.load) model.load_state_dict(state_dict)
def main(args): # Set CUDA GPU os.environ["CUDA_VISIBLE_DEVICES"] = \ ','.join(str(gpu) for gpu in args.visible_gpus) device = "cuda" if torch.cuda.is_available() else "cpu" # Set network net = config_net(args.net_name) net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True # Set loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=args.lr) # optimizer = optim.SGD( # net.parameters(), lr=args.lr, # momentum=0.9, weight_decay=5e-4) # Data loader print("Constructing data loader ...") data_dir = args.data_dir train_loader, valid_loader = data_loader.get_train_valid_loader( data_dir=data_dir, batch_size=128, augment=True, random_seed=20, valid_size=0.1, shuffle=True, num_workers=4, pin_memory=True) # Training processes epochs = args.epochs history = { "acc": [0.0] * epochs, "loss": [0.0] * epochs, "val_acc": [0.0] * epochs, "val_loss": [0.0] * epochs } ckpt_dir = os.path.join(args.ckpt_dir, args.net_name) _save_makedirs(ckpt_dir) best_val_acc = 0.0 for epoch in range(epochs): epoch_start_time = time.time() train_loss = 0 correct = 0 total = 0 # Training net.train() for batch_idx, (inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to(device), targets.to(device) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() utils.progress_bar( batch_idx, len(train_loader), "Loss: %.3f | Acc: %.3f%% (%d/%d)" % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) # Validation val_loss = 0 val_correct = 0 val_total = 0 net.eval() for batch_idx, (inputs, targets) in enumerate(valid_loader): inputs, targets = inputs.to(device), targets.to(device) outputs = net(inputs) loss = criterion(outputs, targets) val_loss += loss.item() _, predicted = outputs.max(1) val_total += targets.size(0) val_correct += predicted.eq(targets).sum().item() train_acc = 100. * correct / total train_loss /= train_loader.__len__() val_acc = 100. * val_correct / val_total val_loss /= valid_loader.__len__() print("[%03d/%03d] %2.2f sec(s)" " Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f" % (epoch + 1, epochs, time.time() - epoch_start_time, train_acc, train_loss, val_acc, val_loss)) history["acc"][epoch] = train_acc history["loss"][epoch] = train_loss history["val_acc"][epoch] = val_acc history["val_loss"][epoch] = val_loss # Save model checkpoints if val_acc > best_val_acc: best_val_acc = val_acc state = { 'net': net.state_dict(), 'acc': train_acc, 'val_acc': val_acc, 'epoch': epoch, } torch.save( state, os.path.join(ckpt_dir, "model_{:03}.pth".format(epoch + 1))) # Save training history with open(os.path.join(ckpt_dir, "history.json"), 'w') as opf: json.dump(history, opf)