def train(dataloader, parameters, device): model = AutoEncoder(input_dim=1900, nlayers=parameters.get('nlayers', 5), latent=100) model = model.to(device) model.train() train_loss = 0 optimizer = torch.optim.Adam(model.parameters(), lr=parameters.get('lr', 1e-5), weight_decay=parameters.get( 'weight_decay', 0.)) loss_func = torch.nn.MSELoss() for epoch in range(parameters.get('epochs', 1000)): for index, (data, ) in enumerate(dataloader, 1): optimizer.zero_grad() output = model(data) loss = loss_func(output, data) train_loss += loss.item() loss.backward() optimizer.step() return model
def run(data_obj, training_size): data_obj.split_dataset(training_size) data_obj.preprocess() #-------------------------------- Autoencoder model ae_model = AutoEncoder(data_obj.x_train_scaled.shape[1], training_size, data_obj.name) ae_model.train(data_obj.x_train_scaled, data_obj.x_val_scaled) #-------------------------------- Encoded representation x_train_encoded, x_val_encoded, x_test_encoded = ae_model.encoded_data( data_obj.x_train_scaled, data_obj.x_val_scaled, data_obj.x_test_scaled) #-------------------------------- Neural Network model nn_model = NeuralNetwork( data_obj.x_train_scaled.shape[1], data_obj.y_train.shape[1], training_size, data_obj.name) nn_model.train( x_train_encoded, data_obj.y_train, x_val_encoded, data_obj.y_val) nn_model.evaluate(x_test_encoded, data_obj.y_test) #-------------------------------- reset data from memory data_obj.reset_scalar() return nn_model.result()
def main(): torch.manual_seed(1618) device = 'cuda' if torch.cuda.is_available() else 'cpu' print('Using PyTorch Device : {}'.format(device.upper())) n_epochs = 800 LOGDIR = './runs/' + datetime.now().strftime('%Y%m%d_%H%M%S') logger = Logger(log_dir=LOGDIR) criterion = nn.MSELoss().to(device) lr = 1e-4 model = AutoEncoder(connections=128).to(device) optim = torch.optim.Adam(itertools.chain(model.encoder.parameters(), model.decoder.parameters()), lr=lr) #model = AE_3D_200().to(device) #optim = torch.optim.Adam(itertools.chain(model.encoder.parameters(), model.decoder.parameters()), lr=lr, weight_decay=1e-6) train_batch, val_batch, test_batch = get_data_batches(device=device, frac=1.0) print(train_batch.size()) print(val_batch.size()) print(test_batch.size()) worst_case_loss = torch.FloatTensor([float('Inf')]).to(device) pbar = tqdm(range(n_epochs), leave=True) for e in pbar: new_lr = lr * (0.2**((e + 1) // 100)) for param_group in optim.param_groups: param_group['lr'] = new_lr optim.zero_grad() recon_batch = model(train_batch) loss = criterion(recon_batch, train_batch) loss.backward() optim.step() model.eval() recon_val = model(val_batch) val_loss = nn.MSELoss()(recon_val, val_batch) recon_test = model(test_batch) test_loss = nn.MSELoss()(recon_test, test_batch) model.train() info = { 'train_loss': loss.item(), 'val_loss': val_loss.item(), 'test_loss': test_loss.item() } for tag, value in info.items(): logger.scalar_summary(tag, value, e) torch.save(model.encoder.state_dict(), LOGDIR + '/encoder_epoch_{}.pt'.format(e)) torch.save(model.decoder.state_dict(), LOGDIR + '/decoder_epoch_{}.pt'.format(e)) pbar.set_description( 'train_loss: {:.4f}, val_loss: {:.4f}, test_loss: {:.4f}'.format( loss.item(), val_loss.item(), test_loss.item()))
def train(output_filename, model_type, hidden_size, loss_type, norm_type, sigma_noise): train_data = torchvision.datasets.MNIST( root='datasets/mnist/', train=True, transform=torchvision.transforms.ToTensor(), download=False, ) train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True) if loss_type == 'l2': loss_func = nn.MSELoss() elif loss_type == 'cross_entropy': loss_func = F.binary_cross_entropy if model_type == 'AE': model = AutoEncoder(hidden_size).cuda() elif model_type == 'LTAE': model = LatentAutoEncoder(hidden_size, norm_type, sigma=sigma_noise).cuda() model.set_device() elif model_type == 'VAE': model = VariationalAE(hidden_size).cuda() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) model.train() for epoch in range(EPOCH): for step, (x, _) in enumerate(train_loader): optimizer.zero_grad() x_batch = x.view(-1, 28 * 28).cuda() y_batch = x.view(-1, 28 * 28).cuda() if model_type == 'AE': _, decoded = model(x_batch) loss = loss_func(decoded, y_batch) elif model_type == 'LTAE': _, latent, transformed, decoded = model(x_batch) loss = loss_func(decoded, y_batch) loss += torch.nn.functional.mse_loss(transformed, latent) elif model_type == 'VAE': decoded, mu, logvar = model(x_batch) loss = loss_func_vae(decoded, x_batch, mu, logvar, loss_type) loss.backward() optimizer.step() if epoch % 10 == 0: print('Epoch: ', epoch, '| train loss: %.4f' % loss.detach().cpu()) torch.save({'state_dict': model.state_dict()}, f'./saved_models/{output_filename}')
def main(dataset, net_config, _run): # Add all of the config into the helper class for key in net_config: setattr(a, key, net_config[key]) setattr(a, 'EXP_OUT', EXP_OUT) setattr(a, 'RUN_id', _run._id) output_dir = create_directories(_run._id, ex) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # load the dataset class data = get_dataset(dataset['name']) data = data(**dataset) model = AutoEncoder(sess, image_size=a.input_image_size, batch_size=a.batch_size, output_size=a.input_image_size, dataset_name=dataset['name'], checkpoint_dir=output_dir, data=data, momentum=a.batch_momentum, aef_dim=a.naef, noise_std_dev=a.noise_std_dev) if a.mode == 'train': tmp = model.train(a) _run.info['predictions'] = tmp _run.info['mean_predictions'] = np.mean(tmp, axis=0) elif a.mode == 'valid': tmp = model.validate(a) _run.info['predictions'] = tmp _run.info['mean_predictions'] = np.mean(tmp, axis=0) else: model.test(a)
def run(args): # Create AutoEncoder autoencoder = AutoEncoder(args['input_shape'], args['z_dim'], args['c_dim'], learning_rate=args['learning_rate']) # train autoencoder.train(args['train_dir'], args['val_dir'], args['epochs'], args['batch_size'], args['output_dir']) # plot x = autoencoder.sample_data() plot_original(x, save_dir=args['output_dir']) plot_reconstruction(x, autoencoder, save_dir=args['output_dir']) plot_zvariation(x, autoencoder, save_dir=args['output_dir']) plot_cvariation(x, autoencoder, save_dir=args['output_dir']) plot_zsemireconstructed(x, autoencoder, save_dir=args['output_dir']) plot_csemireconstructed(x, autoencoder, save_dir=args['output_dir'])
def train_autoencoder(train_matrix, test_set): num_users, num_items = train_matrix.shape weight_matrix = log_surplus_confidence_matrix(train_matrix, alpha=args.alpha, epsilon=args.epsilon) train_matrix[train_matrix > 0] = 1.0 place_correlation = scipy.sparse.load_npz( './data/Foursquare/place_correlation_gamma60.npz') assert num_items == place_correlation.shape[0] print(train_matrix.shape) # Construct the model by instantiating the class defined in model.py model = AutoEncoder(num_items, args.inner_layers, num_items, da=args.num_attention, dropout_rate=args.dropout_rate) if torch.cuda.is_available(): model.cuda() criterion = torch.nn.MSELoss(size_average=False, reduce=False) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) batch_size = args.batch_size user_indexes = np.arange(num_users) model.train() for t in range(args.epoch): print("epoch:{}".format(t)) np.random.shuffle(user_indexes) avg_cost = 0. for batchID in range(int(num_users / batch_size)): start = batchID * batch_size end = start + batch_size batch_user_index = user_indexes[start:end] batch_x, batch_x_weight, batch_item_index = get_mini_batch( train_matrix, weight_matrix, batch_user_index) batch_x_weight += 1 batch_x = Variable(torch.from_numpy(batch_x).type(T.FloatTensor), requires_grad=False) y_pred = model(batch_item_index, place_correlation) # Compute and print loss batch_x_weight = Variable(torch.from_numpy(batch_x_weight).type( T.FloatTensor), requires_grad=False) loss = (batch_x_weight * criterion(y_pred, batch_x)).sum() / batch_size print(batchID, loss.data) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() avg_cost += loss / num_users * batch_size print("Avg loss:{}".format(avg_cost)) # print the prediction score for the user 0 print( model([train_matrix.getrow(0).indices], place_correlation) [:, T.LongTensor(train_matrix.getrow(0).indices.astype(np.int32))]) print(model([train_matrix.getrow(0).indices], place_correlation)) # Evaluation model.eval() topk = 20 recommended_list = [] for user_id in range(num_users): user_rating_vector = train_matrix.getrow(user_id).toarray() pred_rating_vector = model([train_matrix.getrow(user_id).indices], place_correlation) pred_rating_vector = pred_rating_vector.cpu().data.numpy() user_rating_vector = user_rating_vector[0] pred_rating_vector = pred_rating_vector[0] pred_rating_vector[user_rating_vector > 0] = 0 item_recommended_dict = dict() for item_inner_id, score in enumerate(pred_rating_vector): item_recommended_dict[item_inner_id] = score sorted_item = heapq.nlargest(topk, item_recommended_dict, key=item_recommended_dict.get) recommended_list.append(sorted_item) print(test_set[user_id], sorted_item[:topk]) print(pred_rating_vector[sorted_item[0]], pred_rating_vector[sorted_item[1]], pred_rating_vector[sorted_item[2]], pred_rating_vector[sorted_item[3]], pred_rating_vector[sorted_item[4]]) print("user:%d, precision@5:%f, precision@10:%f" % (user_id, eval_metrics.precision_at_k_per_sample(test_set[user_id], sorted_item[:5], 5), eval_metrics.precision_at_k_per_sample( test_set[user_id], sorted_item[:topk], topk))) precision, recall, MAP = [], [], [] for k in [5, 10, 15, 20]: precision.append( eval_metrics.precision_at_k(test_set, recommended_list, k)) recall.append(eval_metrics.recall_at_k(test_set, recommended_list, k)) MAP.append(eval_metrics.mapk(test_set, recommended_list, k)) print(precision) print(recall) print(MAP)
class Trainer(object): def __init__(self, arguments): self.arguments = arguments input_dims = 28, 28, 1 self.ae = AutoEncoder( input_dims=input_dims, encoder_filters=arguments.encoder_filters, encoder_conv_kernels=arguments.encoder_conv_kernels, encoder_conv_strides=arguments.encoder_conv_strides, decoder_filters=arguments.decoder_filters, decoder_conv_kernels=arguments.decoder_conv_kernels, decoder_conv_strides=arguments.decoder_conv_strides, latent_dim=arguments.latent_dim, use_batch_norm=arguments.use_batch_norm, use_dropout=arguments.use_dropout).to(device) self.ae.train() self.criterion = nn.MSELoss() self.encoder_optimizer = opt.Adam(params=self.ae.encoder.parameters(), lr=arguments.learning_rate) self.decoder_optimizer = opt.Adam(params=self.ae.decoder.parameters(), lr=arguments.learning_rate) self.writer = SummaryWriter( logdir=os.path.join(self.arguments.log_dir, self.arguments.data), comment='epoch_{0:03d}_batch_size_{1:03d}_lr_{2:.03f}'.format( self.arguments.epochs - 1, self.arguments.batch_size, self.arguments.learning_rate)) def train(self): step = 0 for epoch in range(self.arguments.epochs): for data in self.train_dataloader(): x, _ = data x = x.to(device) _, out = self.ae(x) # Optimizer & Backward self.encoder_optimizer.zero_grad() self.decoder_optimizer.zero_grad() loss = self.criterion(input=out, target=x) loss.backward() self.encoder_optimizer.step() self.decoder_optimizer.step() # Console & Tensorboard Log 출력 if step % self.arguments.print_step_point == 0: print( '[Epoch] : {0:03d} [Step] : {1:06d} [Loss]: {2:.05f}' .format(epoch, step, loss.item())) self.writer.add_scalar('loss', loss.item()) # 모델 저장 if step % self.arguments.save_step_point == 0: ckpt_dir = os.path.join( self.arguments.ckpt_dir, self.arguments.data, 'step_{0:05d}_batch_size_{1:03d}_lr_{2:.05f}.pth'. format(step, self.arguments.batch_size, self.arguments.learning_rate)) model_save(model=self.ae, encoder_optimizer=self.encoder_optimizer, decoder_optimizer=self.decoder_optimizer, loss=loss.item(), latent_dim=self.arguments.latent_dim, ckpt_dir=ckpt_dir) print('save model \t => ', ckpt_dir) step += 1 # 학습 후 마지막 결과 저장 ckpt_dir = os.path.join( self.arguments.ckpt_dir, self.arguments.data, 'step_{0:05d}_batch_size_{1:03d}_lr_{2:.05f}.pth'.format( step, self.arguments.batch_size, self.arguments.learning_rate)) model_save(model=self.ae, encoder_optimizer=self.encoder_optimizer, decoder_optimizer=self.decoder_optimizer, loss=loss.item(), latent_dim=self.arguments.latent_dim, ckpt_dir=ckpt_dir) print('save model \t => ', ckpt_dir) def train_dataloader(self): if self.arguments.data == 'mnist': dataloader = mnist_train_dataloader( data_dir=os.path.join(args.data_dir, args.data), batch_size=self.arguments.batch_size) return dataloader def get_input_dims(self): if self.arguments.data == 'mnist': return 28, 28, 1
network.load_state_dict(torch.load(args.model)) else: print('Begin training new model.') network.to(DEVICE) optimizer = optim.Adam(network.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.7) max_iter = int(len(train_dataset) / args.batch_size + 0.5) minimum_loss = 1e4 best_epoch = 0 for epoch in range(1, args.epochs + 1): # training network.train() total_loss, iter_count = 0, 0 for i, data in enumerate(train_dataloader, 1): partial_input, coarse_gt, dense_gt = data partial_input = partial_input.to(DEVICE) coarse_gt = coarse_gt.to(DEVICE) dense_gt = dense_gt.to(DEVICE) partial_input = partial_input.permute(0, 2, 1) optimizer.zero_grad() v, y_coarse, y_detail = network(partial_input) y_coarse = y_coarse.permute(0, 2, 1) y_detail = y_detail.permute(0, 2, 1)
def train_autoencoder(train_matrix, test_set): num_users, num_items = train_matrix.shape weight_matrix = log_surplus_confidence_matrix(train_matrix, alpha=args.alpha, epsilon=args.epsilon) train_matrix[train_matrix > 0] = 1.0 place_correlation = scipy.sparse.load_npz( 'Foursquare/place_correlation_gamma60.npz') assert num_items == place_correlation.shape[0] print(train_matrix.shape) # Construct the model by instantiating the class defined in model.py model = AutoEncoder(num_items, args.inner_layers, num_items, da=args.num_attention, dropout_rate=args.dropout_rate) if torch.cuda.is_available(): model.cuda() criterion = torch.nn.MSELoss(size_average=False, reduce=False) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) batch_size = args.batch_size user_indexes = np.arange(num_users) model.train() torch.load('model.pkl') # Evaluation model.eval() topk = 20 recommended_list = [] for user_id in range(num_users): user_rating_vector = train_matrix.getrow(user_id).toarray() pred_rating_vector = model([train_matrix.getrow(user_id).indices], place_correlation) pred_rating_vector = pred_rating_vector.cpu().data.numpy() user_rating_vector = user_rating_vector[0] pred_rating_vector = pred_rating_vector[0] pred_rating_vector[user_rating_vector > 0] = 0 item_recommended_dict = dict() for item_inner_id, score in enumerate(pred_rating_vector): item_recommended_dict[item_inner_id] = score sorted_item = heapq.nlargest(topk, item_recommended_dict, key=item_recommended_dict.get) recommended_list.append(sorted_item) print(test_set[user_id], sorted_item[:topk]) print(pred_rating_vector[sorted_item[0]], pred_rating_vector[sorted_item[1]], pred_rating_vector[sorted_item[2]], pred_rating_vector[sorted_item[3]], pred_rating_vector[sorted_item[4]]) print("user:%d, precision@5:%f, precision@10:%f" % (user_id, eval_metrics.precision_at_k_per_sample(test_set[user_id], sorted_item[:5], 5), eval_metrics.precision_at_k_per_sample( test_set[user_id], sorted_item[:topk], topk))) precision, recall, MAP = [], [], [] for k in [5, 10, 15, 20]: precision.append( eval_metrics.precision_at_k(test_set, recommended_list, k)) recall.append(eval_metrics.recall_at_k(test_set, recommended_list, k)) MAP.append(eval_metrics.mapk(test_set, recommended_list, k)) print(precision) print(recall) print(MAP)
def train(args): print('Start') if torch.cuda.is_available(): device = 'cuda' torch.set_default_tensor_type('torch.cuda.FloatTensor') else: device = 'cpu' train_epoch = args.train_epoch lr = args.lr beta1 = args.beta1 beta2 = args.beta2 batch_size = args.batch_size noise_var = args.noise_var h_dim = args.h_dim images_path = glob.glob(args.data_dir+'/face_images/*/*.png') random.shuffle(images_path) split_num = int(len(images_path)*0.8) train_path = images_path[:split_num] test_path = images_path[split_num:] result_path = images_path[-15:] train_dataset = MyDataset(train_path) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) test_dataset = MyDataset(test_path) test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True) result_dataset = MyDataset(result_path) result_dataloader = torch.utils.data.DataLoader(result_dataset, batch_size=result_dataset.__len__(), shuffle=False) result_images = next(iter(result_dataloader)) model = AutoEncoder(h_dim=h_dim).to(device) criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr, (beta1, beta2)) out_path = args.model_dir train_loss_list = [] test_loss_list = [] for epoch in range(train_epoch): model.to(device) loss_train = 0 for x in train_dataloader: noised_x = add_noise(x, noise_var) recon_x = model(noised_x) loss = criterion(recon_x, x) optimizer.zero_grad() loss.backward() optimizer.step() loss_train += loss.item() loss_train /= train_dataloader.__len__() train_loss_list.append(loss_train) if epoch % 1 == 0: with torch.no_grad(): model.eval() loss_test = 0 for x_test in test_dataloader: recon_x_test = model(x_test) loss_test += criterion(recon_x_test, x_test).item() loss_test /= test_dataloader.__len__() test_loss_list.append(loss_test) np.save(os.path.join(out_path, 'train_loss.npy'), np.array(train_loss_list)) np.save(os.path.join(out_path, 'test_loss.npy'), np.array(test_loss_list)) model.train()
class Trainer(object): def __init__(self, train_loader, test_loader, config): self.train_loader = train_loader self.test_loader = test_loader self.config = config self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.num_epochs = config.num_epochs self.lr = config.lr self.in_channel = config.in_channel self.image_size = config.image_size self.hidden_dim = config.hidden_dim self.output_dim = config.output_dim self.log_interval = config.log_interval self.sample_interval = config.sample_interval self.ckpt_interval = config.ckpt_interval self.sample_folder = config.sample_folder self.ckpt_folder = config.ckpt_folder self.build_net() self.vis = Visualizer() def build_net(self): # define network self.net = AutoEncoder(self.in_channel, self.image_size, self.hidden_dim, self.output_dim) if self.config.mode == 'test' and self.config.training_path == '': print("[*] Enter model path!") exit() # if training model exists if self.config.training_path != '': self.net.load_state_dict( torch.load(self.config.training_path, map_location=lambda storage, loc: storage)) print("[*] Load weight from {}!".format(self.config.training_path)) self.net.to(self.device) # add noise to image def add_noise(self, imgs): noise = torch.randn(imgs.size()) * 0.4 noisy_imgs = noise + imgs return noisy_imgs def train(self): # define loss function bce_criterion = nn.BCELoss().to(self.device) mse_criterion = nn.MSELoss().to(self.device) # define optimizer optimizer = Adam(self.net.parameters(), self.lr) step = 0 print("[*] Learning started!") # get fixed sample temp_iter = iter(self.train_loader) fixed_imgs, _ = next(temp_iter) fixed_imgs = fixed_imgs.to(self.device) # save fixed sample image x_path = os.path.join(self.sample_folder, 'fixed_input.png') save_image(fixed_imgs, x_path, normalize=True) print("[*] Save fixed input image!") # make fixed noisy sample and save fixed_noisy_imgs = self.add_noise(fixed_imgs) noisy_x_path = os.path.join(self.sample_folder, 'fixed_noisy_input.png') save_image(fixed_noisy_imgs, noisy_x_path, normalize=True) print("[*] Save fixed noisy input image!") # flatten data tensors fixed_imgs = fixed_imgs.view(fixed_imgs.size(0), -1) fixed_noisy_imgs = fixed_noisy_imgs.view(fixed_imgs.size(0), -1) for epoch in range(self.num_epochs): for i, (imgs, _) in enumerate(self.train_loader): self.net.train() imgs = imgs.view(imgs.size(0), -1) # original images noisy_imgs = self.add_noise(imgs) # add noise noisy_imgs = noisy_imgs.to(self.device) # forwarding outputs = self.net(noisy_imgs) # use noisy image as input bce_loss = bce_criterion(outputs, imgs) mse_loss = mse_criterion(outputs, imgs) # backwarding optimizer.zero_grad() bce_loss.backward() # backward BCE loss optimizer.step() # do logging if (step + 1) % self.log_interval == 0: print("[{}/{}] [{}/{}] BCE loss: {:3f}, MSE loss:{:3f}". format(epoch + 1, self.num_epochs, i + 1, len(self.train_loader), bce_loss.item() / len(imgs), mse_loss.item() / len(imgs))) self.vis.plot("BCE Loss plot", bce_loss.item() / len(imgs)) self.vis.plot("MSE Loss plot", mse_loss.item() / len(imgs)) # do sampling if (step + 1) % self.sample_interval == 0: outputs = self.net(fixed_noisy_imgs) x_hat = outputs.cpu().data.view(outputs.size(0), -1, self.image_size, self.image_size) x_hat_path = os.path.join( self.sample_folder, 'output_epoch{}.png'.format(epoch + 1)) save_image(x_hat, x_hat_path, normalize=True) print("[*] Save sample images!") step += 1 if (epoch + 1) % self.ckpt_interval == 0: ckpt_path = os.path.join(self.ckpt_folder, 'ckpt_epoch{}.pth'.format(epoch + 1)) torch.save(self.net.state_dict(), ckpt_path) print("[*] Checkpoint saved!") print("[*] Learning finished!") ckpt_path = os.path.join(self.ckpt_folder, 'final_model.pth') torch.save(self.net.state_dict(), ckpt_path) print("[*] Final weight saved!")
if cuda_available: auto.cuda() # 定义优化器和损失函数 optimizer = torch.optim.Adam(auto.parameters(), lr=LR) # 数据准备 root_dir = "./celeba_select" image_files = os.listdir(root_dir) train_dataset = CelebaDataset(root_dir, image_files, (64, 64), transforms.Compose([ToTensor()])) train_loader = DataLoader(train_dataset, batch_size=32, num_workers=1, shuffle=True) for i in range(EPOCHES): # 打乱数据 auto.train() train_loss = 0 for batch_idx, data in enumerate(train_loader): data = Variable(data.type(torch.FloatTensor)) if cuda_available: data = data.cuda() optimizer.zero_grad() # push whole batch of data through VAE.forward() to get recon_loss recon_batch, mu, logvar = auto(data) # calculate scalar loss loss = loss_function(recon_batch, data, mu, logvar) # calculate the gradient of the loss w.r.t. the graph leaves # i.e. input variables -- by the power of pytorch! loss.backward() train_loss += loss.item()
from model import AutoEncoder if __name__ == '__main__': # Train if False: autoencoder = AutoEncoder(input_shape=(32, 32, 3), latent_dim=64) autoencoder.train(train_dir='celeba_data/train', val_dir='celeba_data/val', epochs=20) else: autoencoder = AutoEncoder(input_shape=(32, 32, 3), latent_dim=64) autoencoder.restore_weights() autoencoder.reconstruct_samples('test_data') autoencoder.generate_samples() autoencoder.compute_distance('test_data')
def train(self, config): """Training routine""" # Initialize datasets for both training and validation train_data = torchvision.datasets.ImageFolder( root=os.path.join(config.data_dir, "train"), transform=torchvision.transforms.ToTensor()) valid_data = torchvision.datasets.ImageFolder( root=os.path.join(config.data_dir, "valid"), transform=torchvision.transforms.ToTensor()) # Create data loader for training and validation. tr_data_loader = torch.utils.data.DataLoader( dataset=train_data, batch_size=config.batch_size, num_workers=config.numWorker, shuffle=True) va_data_loader = torch.utils.data.DataLoader( dataset=valid_data, batch_size=config.batch_size, num_workers=config.numWorker, shuffle=False) # Create model instance. #model = Model() model = AutoEncoder() # Move model to gpu if cuda is available if torch.cuda.is_available(): model = model.cuda() # Make sure that the model is set for training model.train() # Create loss objects data_loss = nn.MSELoss() # Create optimizier optimizer = optim.Adam(model.parameters(), lr=config.learn_rate) # No need to move the optimizer (as of PyTorch 1.0), it lies in the same # space as the model # Create summary writer tr_writer = SummaryWriter( log_dir=os.path.join(config.log_dir, "train")) va_writer = SummaryWriter( log_dir=os.path.join(config.log_dir, "valid")) # Create log directory and save directory if it does not exist if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) if not os.path.exists(config.save_dir): os.makedirs(config.save_dir) # Initialize training iter_idx = -1 # make counter start at zero best_va_acc = 0 # to check if best validation accuracy # Prepare checkpoint file and model file to save and load from checkpoint_file = os.path.join(config.save_dir, "checkpoint.pth") bestmodel_file = os.path.join(config.save_dir, "best_model.pth") # Check for existing training results. If it existst, and the configuration # is set to resume `config.resume==True`, resume from previous training. If # not, delete existing checkpoint. if os.path.exists(checkpoint_file): if config.resume: # Use `torch.load` to load the checkpoint file and the load the # things that are required to continue training. For the model and # the optimizer, use `load_state_dict`. It's actually a good idea # to code the saving part first and then code this part. print("Checkpoint found! Resuming") # TODO proper logging # Read checkpoint file. # Fix gpu -> cpu bug compute_device = 'cuda' if torch.cuda.is_available() else 'cpu' load_res = torch.load(checkpoint_file, map_location=compute_device) # Resume iterations iter_idx = load_res["iter_idx"] # Resume best va result best_va_acc = load_res["best_va_acc"] # Resume model model.load_state_dict(load_res["model"]) # Resume optimizer optimizer.load_state_dict(load_res["optimizer"]) # Note that we do not resume the epoch, since we will never be able # to properly recover the shuffling, unless we remember the random # seed, for example. For simplicity, we will simply ignore this, # and run `config.num_epoch` epochs regardless of resuming. else: os.remove(checkpoint_file) # Training loop for epoch in range(config.num_epoch): # For each iteration prefix = "Training Epoch {:3d}: ".format(epoch) for data in tqdm(tr_data_loader, desc=prefix): # Counter iter_idx += 1 # Split the data # x is img, y is label x, y = data #print(x) # Send data to GPU if we have one if torch.cuda.is_available(): x = x.cuda() y = y.cuda() # Apply the model to obtain scores (forward pass) logits = model.forward(x) # Compute the loss loss = data_loss(logits, x.float()) # Compute gradients loss.backward() # Update parameters optimizer.step() # Zero the parameter gradients in the optimizer optimizer.zero_grad() # Monitor results every report interval if iter_idx % config.rep_intv == 0: # Compute accuracy (No gradients required). We'll wrapp this # part so that we prevent torch from computing gradients. with torch.no_grad(): pred = torch.argmax(logits, dim=1) acc = torch.mean( torch.eq(pred.view(x.size()), x).float()) * 100.0 # Write loss and accuracy to tensorboard, using keywords `loss` # and `accuracy`. tr_writer.add_scalar("loss", loss, global_step=iter_idx) tr_writer.add_scalar("accuracy", acc, global_step=iter_idx) # Save torch.save( { "iter_idx": iter_idx, "best_va_acc": best_va_acc, "model": model.state_dict(), "optimizer": optimizer.state_dict(), "loss": loss, "epoch": epoch, "acc": acc }, checkpoint_file) # Validate results every validation interval if iter_idx % config.val_intv == 0: # List to contain all losses and accuracies for all the # training batches va_loss = [] va_acc = [] # Set model for evaluation model = model.eval() for data in va_data_loader: # Split the data x, y = data # Send data to GPU if we have one if torch.cuda.is_available(): x = x.cuda() y = y.cuda() # Apply forward pass to compute the losses # and accuracies for each of the validation batches with torch.no_grad(): # Compute logits logits = model.forward(x) # Compute loss and store as numpy loss = data_loss(logits, x.float()) va_loss += [loss.cpu().numpy()] # Compute accuracy and store as numpy pred = torch.argmax(logits, dim=1) acc = torch.mean( torch.eq(pred.view(x.size()), x).float()) * 100.0 va_acc += [acc.cpu().numpy()] # Set model back for training model = model.train() # Take average va_loss = np.mean(va_loss) va_acc = np.mean(va_acc) # Write to tensorboard using `va_writer` va_writer.add_scalar("loss", va_loss, global_step=iter_idx) va_writer.add_scalar("accuracy", va_acc, global_step=iter_idx) # Check if best accuracy if va_acc > best_va_acc: best_va_acc = va_acc # Save best model using torch.save. Similar to previous # save but at location defined by `bestmodel_file` torch.save( { "iter_idx": iter_idx, "best_va_acc": best_va_acc, "model": model.state_dict(), "optimizer": optimizer.state_dict(), "loss": loss, "acc": acc }, bestmodel_file)