def test_train(model, train_data): optimizer = optim.Adam(model.parameters(), lr=1e-3) print('testing') epoch = 'pytest' model.train() train_loss = 0 for batch_idx, (data, _) in enumerate(train_data): optimizer.zero_grad() recon_batch, mu, logvar = model(data) assert mu.size() == logvar.size() loss = loss_function(recon_batch, data, mu, logvar) assert loss != 0 loss.backward() train_loss += loss.item() optimizer.step() if batch_idx % 10 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_data.dataset), 100. * batch_idx / len(train_data), loss.item() / len(data))) print('====> Epoch: {} Average loss: {:.4f}'.format( epoch, train_loss / len(train_data.dataset)))
def train(epoch): model.train() train_loss = 0 for batch_idx, (data, _) in enumerate(train_loader): h = hpy() logging.info('Memory consumption in bytes: {}'.format(h.heap().size)) data = data.to(device) optimizer.zero_grad() # prevent gradient from accumulating recon_batch, mu, logvar = model(data) loss = loss_function(recon_batch, data, mu, logvar) loss.backward() train_loss += loss.item() optimizer.step() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss_)) with torch.no_grad(): sample_ = torch.randn(64, 128).to(device) sample_ = model.decode(sample_).cpu() save_image( sample_.view(64, 3, 32, 32), 'intermediates/sample_' + timestring + str(batch_idx) + '.png') print('====> Epoch: {} Average loss: {:.4f}'.format( epoch, train_loss / len(train_loader.dataset)))
def train(epoch, train_loader, model, optimizer, task_name, data_set, dim_Zt, plot_r): model.train() train_loss = 0 for batch_idx, (data) in enumerate(train_loader): data = data[0].to(device) optimizer.zero_grad() recon_batch, zt = model(data) # invoke the forward function loss, bce = models.loss_function( recon_batch, data) # from loss function to back prop the variables. loss.backward() this_loss = loss.item() train_loss += this_loss optimizer.step() if batch_idx % args.log_interval == 0: print('Train:{} {} dim {} Epoch:{} [{}/{} ({:.0f}%)]'.format( task_name, data_set, dim_Zt, epoch, (batch_idx + 1) * len(data), len(train_loader.dataset), 100. * (batch_idx + 1) / len(train_loader))) # if batch_idx == 0 and epoch % epoch_interval == 0: # save_zt_vis(mu.detach().to('cpu').numpy(), epoch) train_loss /= len(train_loader.dataset) print( 'Train:{} {} dim {} Epoch:{} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t[BCE: {:.6f}]' .format(task_name, data_set, dim_Zt, epoch, (batch_idx + 1) * len(data), len(train_loader.dataset), 100. * (batch_idx + 1) / len(train_loader), train_loss, train_loss)) if epoch % epoch_interval == 0: plot_r.save_results_4( model, generate_file_name(task_name, data_set, dim_Zt, epoch, train_loss), task_name + " epoch " + str(epoch)) return train_loss
def train(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") vae = VAE(device) vae.to(device) optimizer = optim.SGD(vae.parameters(), lr=lr) #viz = visdom.Visdom() print(time.asctime(time.localtime(time.time()))) n = 0 for epoch in range(epoch_sum): loss_sum = 0 for idx, image in enumerate(train_dataloader, 0): image = image[0].to(device) optimizer.zero_grad() out, mu, logvar = vae(image) loss = loss_function(out, image, mu, logvar) loss_sum += loss.item() loss.backward() optimizer.step() #viz.line([loss.item()], [n], update="append", win='loss_win') n += 1 if idx % 100 == 99: print("epoch:%d, idx:%d, loss:%.6f" % (epoch + 1, idx, loss_sum / 100)) loss_sum = 0 torch.save(vae.state_dict(), './vae.pth') print(time.asctime(time.localtime(time.time())))
def test_step(batch): loss = 0 with tf.GradientTape() as tape: pred = albert(batch, training=True) loss += loss_function(batch, pred) batch_loss = (loss / int(batch.shape[0])) test_loss(batch_loss) return batch_loss
def train_step(batch): loss = 0 with tf.GradientTape() as tape: pred = albert(batch, training=True) loss += loss_function(batch, pred) batch_loss = (loss / int(batch.shape[0])) variables = albert.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) train_loss(batch_loss) return batch_loss
def train_step(nnue, sample, optimizer, lambda_, epoch, idx, num_batches): us, them, white, black, outcome, score = sample pred = nnue(us, them, white, black) loss = M.loss_function(lambda_, pred, sample) print( f'Epoch {epoch}, {int(((idx+1)/num_batches)*100.0)}% ({idx+1}/{num_batches}) => {loss.item():.5f}', end='\r') loss.backward() optimizer.step() nnue.zero_grad() return loss
def calculate_validation_loss(nnue, val_data_loader, lambda_): nnue.eval() with torch.no_grad(): val_loss = [] for k, sample in enumerate(val_data_loader): us, them, white, black, outcome, score = sample pred = nnue(us, them, white, black) loss = M.loss_function(lambda_, pred, sample) val_loss.append(loss) val_loss = torch.mean(torch.tensor(val_loss)) nnue.train() return val_loss
def train_step(batch, loss_object): loss = 0 with tf.GradientTape() as tape: for t in range(1, batch.shape[0]): inp, tar = mask_sequences(batch, t=t) pred = birnn(inp, predict=True) loss += loss_function(tar, pred, loss_object) batch_loss = (loss / int(batch.shape[0])) variables = birnn.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) train_loss(batch_loss) return batch_loss
def train_step(inp, tar): tar_inp = tar[:, :-1] tar_real = tar[:, 1:] enc_padding_mask, combined_mask, dec_padding_mask = create_masks( inp, tar_inp) with tf.GradientTape() as tape: predictions, _ = transformer(inp, tar_inp, True, enc_padding_mask, combined_mask, dec_padding_mask) loss = loss_function(tar_real, predictions, loss_object) gradients = tape.gradient(loss, transformer.trainable_variables) optimizer.apply_gradients(zip(gradients, transformer.trainable_variables)) train_loss(loss)
def test(epoch, test_loader, model, task_name, data_set, plot_r): model.eval() test_loss = 0 # li_mu[:]=[] # li_logvar[:]=[] with torch.no_grad(): for i, (data) in enumerate(test_loader): data = data[0].to(device) recon_batch, _ = model(data) loss, mse = models.loss_function(recon_batch, data) test_loss += loss.item() # if i == 0 and epoch % epoch_interval == 0: # n = min(data.size(0), 8) # comparison = torch.cat([data[:n], recon_batch.view(-1, 3, 240, 240)[:n]]) # plot_r.save_reconstruction(comparison.cpu(), generate_file_name(task_name, data_set, epoch, # test_loss/recon_batch.shape[0]), n) test_loss /= len(test_loader.dataset)
def train_step(inp, targ, enc_hidden): loss = 0 with tf.GradientTape() as tape: enc_output, enc_hidden = encoder(inp, enc_hidden) dec_hidden = enc_hidden dec_input = tf.fill([hparams.batch, 1], SOS_ID) for t in range(targ.shape[1]): predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output) loss += model.loss_function( tf.reshape(tf.slice(targ, [0, t], [-1, 1]), [-1]), predictions) dec_input = tf.slice(targ, [0, t], [-1, 1]) batch_loss = (loss / int(targ.shape[1])) variables = encoder.trainable_variables + decoder.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) return batch_loss
def run(): with tf.Session() as sess: # refactor to share with train input_images, conv4_3_pool, conv4_3_relu, keep_prob = load_vgg(sess, VGG_PATH) confidences_all, locations_all = ssd_layers(conv4_3_pool, conv4_3_relu) loss, probabilities, probability_confidences , \ true_locations, true_confidences, confidence_loss_mask = loss_function(confidences_all, locations_all) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint('checkpoints')) print("Model restored") run_image(sess, input_images, locations_all, probability_confidences, probabilities)
def train_step(inp, tar): tar_real = tar[:, 1:] tar_inp = tar[:, :-1] enc_padding_mask, combined_mask, dec_padding_mask = create_mask(inp, tar_inp) with tf.GradientTape() as tape: predictions, _ = transfomer(inp, tar_inp, True, enc_padding_mask, combined_mask, dec_padding_mask) loss = loss_function(tar_real, predictions, n_classes=target_vocab_size, rate=0.1) gradients = tape.gradient(loss, transfomer.trainable_variables) optimizer.apply_gradients(zip(gradients, transfomer.trainable_variables)) train_loss(loss) train_accuracy(tar_real, predictions)
def test(epoch, test_loader, model, task_name, data_set, dim_Zt, plot_r): model.eval() test_loss = 0 with torch.no_grad(): for i, (data) in enumerate(test_loader): data = data[0].to(device) recon_batch, zt = model(data) loss, bce = models.loss_function(recon_batch, data) test_loss += loss.item() # if i == 0 and epoch % epoch_interval == 0: # n = min(data.size(0), 8) # comparison = torch.cat([data[:n], recon_batch.view(-1, 3, 240, 240)[:n]]) # plot_r.save_reconstruction(comparison.cpu(), generate_file_name(task_name, data_set, dim_Zt, alpha, # beta, epoch, 0, 0), n) ## when save to image, need to de-normalize image * std + mean # save_image(comparison.cpu()*0.5 + 0.5, 'results/reconstruction/test_' + # generate_file_name(task_name, data_set, dim_Zt, alpha, beta, epoch, 0, 0) + '.png', nrow=n) test_loss /= len(test_loader.dataset) print('====> Test set loss: {:.6f}]'.format(test_loss))
def test(epoch): model.eval() test_loss = 0 with torch.no_grad(): for i, (data, _) in enumerate(test_loader): data = data.to(device) recon_batch, mu, logvar = model(data) test_loss += loss_function(recon_batch, data, mu, logvar).item() if i == 0: n = min(data.size(0), 8) comparison = torch.cat([ data[:n], recon_batch.view(args.batch_size, 3, 32, 32)[:n] ]) save_image(comparison.cpu(), 'results_cifar/reconstruction_' + timestring + str(epoch) + '.png', nrow=n) test_loss /= len(test_loader.dataset) print('====> Test set loss: {:.4f}'.format(test_loss))
def train(): bs_train, bs_valid = args.train_batch_size, args.val_batch_size extension_module = args.context ctx = get_extension_context( extension_module, device_id=args.device_id, type_config=args.type_config ) nn.set_default_context(ctx) if args.input: train_loader, val_loader, n_train_samples, n_val_samples = load_data( bs_train, bs_valid ) else: train_data_source = data_source_cifar10( train=True, shuffle=True, label_shuffle=True ) val_data_source = data_source_cifar10(train=False, shuffle=False) n_train_samples = len(train_data_source.labels) n_val_samples = len(val_data_source.labels) # Data Iterator train_loader = data_iterator( train_data_source, bs_train, None, False, False) val_loader = data_iterator( val_data_source, bs_valid, None, False, False) if args.shuffle_label: if not os.path.exists(args.output): os.makedirs(args.output) np.save(os.path.join(args.output, "x_train.npy"), train_data_source.images) np.save( os.path.join(args.output, "y_shuffle_train.npy"), train_data_source.labels, ) np.save(os.path.join(args.output, "y_train.npy"), train_data_source.raw_label) np.save(os.path.join(args.output, "x_val.npy"), val_data_source.images) np.save(os.path.join(args.output, "y_val.npy"), val_data_source.labels) if args.model == "resnet23": model_prediction = resnet23_prediction elif args.model == "resnet56": model_prediction = resnet56_prediction prediction = functools.partial( model_prediction, ncls=10, nmaps=64, act=F.relu, seed=args.seed) # Create training graphs test = False image_train = nn.Variable((bs_train, 3, 32, 32)) label_train = nn.Variable((bs_train, 1)) pred_train, _ = prediction(image_train, test) loss_train = loss_function(pred_train, label_train) # Create validation graph test = True image_valid = nn.Variable((bs_valid, 3, 32, 32)) label_valid = nn.Variable((bs_valid, 1)) pred_valid, _ = prediction(image_valid, test) loss_val = loss_function(pred_valid, label_valid) for param in nn.get_parameters().values(): param.grad.zero() cfg = read_yaml("./learning_rate.yaml") print(cfg) lr_sched = create_learning_rate_scheduler(cfg.learning_rate_config) solver = S.Momentum(momentum=0.9, lr=lr_sched.get_lr()) solver.set_parameters(nn.get_parameters()) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint file. start_point = load_checkpoint(args.checkpoint, solver) # Create monitor from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=1) monitor_err = MonitorSeries("Training error", monitor, interval=1) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=1) monitor_verr = MonitorSeries("Test error", monitor, interval=1) monitor_vloss = MonitorSeries("Test loss", monitor, interval=1) # save_nnp contents = save_nnp({"x": image_valid}, {"y": pred_valid}, bs_valid) save.save( os.path.join(args.model_save_path, (args.model+"_epoch0_result.nnp")), contents ) train_iter = math.ceil(n_train_samples / bs_train) val_iter = math.ceil(n_val_samples / bs_valid) # Training-loop for i in range(start_point, args.train_epochs): lr_sched.set_epoch(i) solver.set_learning_rate(lr_sched.get_lr()) print("Learning Rate: ", lr_sched.get_lr()) # Validation ve = 0.0 vloss = 0.0 print("## Validation") for j in range(val_iter): image, label = val_loader.next() image_valid.d = image label_valid.d = label loss_val.forward() vloss += loss_val.data.data.copy() * bs_valid ve += categorical_error(pred_valid.d, label) ve /= args.val_iter vloss /= n_val_samples monitor_verr.add(i, ve) monitor_vloss.add(i, vloss) if int(i % args.model_save_interval) == 0: # save checkpoint file save_checkpoint(args.model_save_path, i, solver) # Forward/Zerograd/Backward print("## Training") e = 0.0 loss = 0.0 for k in range(train_iter): image, label = train_loader.next() image_train.d = image label_train.d = label loss_train.forward() solver.zero_grad() loss_train.backward() solver.update() e += categorical_error(pred_train.d, label_train.d) loss += loss_train.data.data.copy() * bs_train e /= train_iter loss /= n_train_samples e = categorical_error(pred_train.d, label_train.d) monitor_loss.add(i, loss) monitor_err.add(i, e) monitor_time.add(i) nn.save_parameters( os.path.join(args.model_save_path, "params_%06d.h5" % (args.train_epochs)) ) # save_nnp_lastepoch contents = save_nnp({"x": image_valid}, {"y": pred_valid}, bs_valid) save.save(os.path.join(args.model_save_path, (args.model+"_result.nnp")), contents)
train_images = overall_train_data[i:i + batch_size].to(device) train_images = train_images.requires_grad_() # Clear gradients w.r.t. parameters optimizer.zero_grad() # Forward pass to get output/logits recon_train_data, mu, var = model_vae(train_images) recon_train_data1 = recon_train_data.reshape(len(recon_train_data), 28, 28) recon_train_data2 = recon_train_data1.cpu().detach().numpy() train_reconstruction_images.append(recon_train_data2) # Calculate Loss and save regularization term too see change train_loss, train_regularization_term = loss_function( recon_train_data1, train_images, mu, var) # Getting gradients w.r.t. parameters train_loss.backward() # Updating parameters optimizer.step() #Save losses during training for each epoch to see change of loss train_loss_history.append(train_loss.item()) train_regularization_terms.append(train_regularization_term.item()) # Print loss and accuracy for each epoch print('Epoch:{}/{}, Train_Loss:{}, Train_Reg_Term:{}'.format( epoch, num_epochs, train_loss.item(), train_regularization_term.item()))