Example #1
0
    vutils.save_image(G(save_noise).detach(), fixed_p, normalize=True)

    num_info = {
        'Discriminator loss': torch.mean(torch.FloatTensor(D_losses)),
        'Generator loss': torch.mean(torch.FloatTensor(G_losses))
    }
    fake_to_show = G(save_noise).detach()

    #tensorboard logging
    writer.add_scalars('Loss', num_info, epoch)
    writer.add_image('Fake Samples', fake_to_show[0].cpu())
    train_hist['per_epoch_ptimes'].append(per_epoch_ptime)
    if epoch % 30 == 0:
        fid_score = fid_model.compute_fid(real_image, G_result)
        print("FID score", fid_score)
        writer.add_scalar('FID Score', fid_score, epoch)

end_time = time.time()
total_ptime = end_time - start_time
train_hist['total_ptime'].append(total_ptime)
print("Avg one epoch ptime: %.2f, total %d epochs ptime: %.2f" %
      (torch.mean(torch.FloatTensor(
          train_hist['per_epoch_ptimes'])), train_epoch, total_ptime))
writer.close()

with open(report_dir + 'train_hist.pkl', 'wb') as f:
    pickle.dump(train_hist, f)

show_train_hist(train_hist, save=True, path=report_dir + 'train_hist.png')

writer.close()
Example #2
0
def train(epochs, batch_size, lr, loss_fn, data_dir):
    
    param_cuda = torch.cuda.is_available()
    
    #check training starting time
    start_time = time.time()
    
    G, D, G_opt, D_opt = initialize(mean=0.0, std=0.02, lr=lr)
    
    
    #train_hist dict will store the losses of every epoch
    train_hist = {}
    train_hist['D_model_mean_losses'] = []
    train_hist['G_model_mean_losses'] = []
    train_hist['per_epoch_ptimes'] = []
    train_hist['total_ptime'] = []

    #folder for saving the images
    if not os.path.isdir('GAN_results'):
        os.mkdir('GAN_results')

    for epoch in range(epochs):

        # Run one epoch
        logging.info("Epoch {}/{}".format(epoch + 1, epochs))
        epoch_start_time = time.time()

        #One epoch of trainning over all the dataset
        train_hist = epoch_train(G=G, D=D, G_opt=G_opt, D_opt=D_opt, batch_size=batch_size, lr=lr, loss_fn=loss_fn, data_dir=data_dir, train_hist=train_hist)

        epoch_end_time = time.time()
        per_epoch_ptime = epoch_end_time - epoch_start_time

        #print progress information for every epoch:
        print("iteration number "+str(epoch))
        print('[%d/%d] - ptime: %.2f, loss_d: %.3f, loss_g: %.3f' % ((epoch + 1), epochs, per_epoch_ptime, torch.mean(torch.FloatTensor(train_hist['D_model_mean_losses'])), torch.mean(torch.FloatTensor(train_hist['G_model_mean_losses']))))

        #Save weights
        utils.save_checkpoint({'epoch': epoch + 1,
                               'D_model_state_dict': D.state_dict(),
                               'G_model_state_dict': G.state_dict(),
                               'D_optim_dict': D_opt.state_dict(),
                               'G_optim_dict': G_opt.state_dict()},
                               is_best=False,
                               checkpoint =  'GAN_results/')

        #Generate and save pictures for every epoch:
        p = 'GAN_results/result_epoch_' + str(epoch + 1) + '.png'
        utils.show_result(param_cuda, G, (epoch+1), p, save=True)

        #add epoch time to the training history
        train_hist['per_epoch_ptimes'].append(per_epoch_ptime)

    end_time = time.time()
    total_ptime = end_time - start_time
    train_hist['total_ptime'].append(total_ptime)

    print("Avg per epoch ptime: %.2f, total %d epochs ptime: %.2f" % (torch.mean(torch.FloatTensor(train_hist['per_epoch_ptimes'])), epochs, total_ptime))
    print("Training finish!... save learned parameters")

    #plot training history
    utils.show_train_hist(train_hist, save=True, path= 'GAN_results/_train_hist.png')
    f7.close()
    f8.close()
    f9.close()

    train_hist['D_losses'].append(np.mean(D_losses))
    train_hist['G_losses'].append(np.mean(G_losses))
    train_hist['per_epoch_ptimes'].append(per_epoch_ptime)

    print('Saving Model Epoch: ', epoch + 1)
    saver.save(sess, './model', write_meta_graph=False)
    print('Model Saved\n')

end_time = time.time()
total_ptime = end_time - start_time
train_hist['total_ptime'].append(total_ptime)

print('Avg per epoch ptime: %.2f, total %d epochs ptime: %.2f' % (np.mean(train_hist['per_epoch_ptimes']), train_epoch, total_ptime))
print("Training finish!... save training results")
with open(root + model + 'train_hist.pkl', 'wb') as f:
    pickle.dump(train_hist, f)

for i in range(10):
    images = []
    for e in range(train_epoch):
        img_name = root + 'Results/' + model + str(e + 1) + str(i) + '.png'
        images.append(imageio.imread(img_name))
    imageio.mimsave(root + model + 'generation_animation' + str(i) + '.gif', images, fps=5)

utils.show_train_hist(train_hist, save=True, path=root + model + 'train_hist.png')
sess.close()
def train_process(config):
  
  root_dir=config.root_dir
  input_dir=os.path.join(root_dir+"data/")
  version=config.version
  root_dir=make_folder(root_dir, version)
  model_dir=make_folder(root_dir, "model/")
  report_dir=make_folder(root_dir,root_dir+"report/")
  output_dir=make_folder(root_dir,"output/epoch/")
  res_dir=make_folder(root_dir,root_dir+"res/")
  inp_width, inp_height, inp_channels,train_split=config.inp_width, config.inp_height, config.inp_channels,config.train_split
  # model parameters
  lrG=config.lrG
  lrD=config.lrD
  beta1=config.beta1
  beta2=config.beta2
  L1_lambda=config.L1_lambda
  ngf=config.ngf
  ndf=config.ndf

  dataset=localImageDataset(root_dir, inp_width, inp_height, inp_channels)
  print("Length of dataset: ",len(dataset))
  train_size=int(train_split*len(dataset))
  val_size=len(dataset)-train_size
  train_dataset, val_dataset=torch.utils.data.random_split(dataset,[train_size,val_size])
  train_dataloader=torch.utils.data.DataLoader(dataset=train_dataset, 
                                                batch_size=batch_size,
                                              shuffle=True,
                                            num_workers=4)
  num_batches=len(train_dataloader)
  val_dataloader=torch.utils.data.DataLoader(dataset=val_dataset,
                                              batch_size=batch_size,
                                              shuffle=True,
                                            num_workers=4)
  
  #from model import generator, discriminator
  #import utils



  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  G = generator(ngf)
  D = discriminator(ndf)
  BCE_loss=nn.BCELoss().cuda()
  L1_loss=nn.L1Loss().cuda()
  G_optimizer=optim.Adam(G.parameters(),lr=lrG,betas=(beta1,beta2))
  D_optimizer=optim.Adam(D.parameters(),lr=lrD,betas=(beta1,beta2))
  start_time=time.time()
  epoch_start=0
  epoch_end=epoch_start+train_epoch

  #loss


  if(os.path.isfile(model_dir+'generator_param.pkl') and os.path.isfile(model_dir+'discriminator_param.pkl')):
    
    G_checkpoint=torch.load(model_dir+'generator_param.pkl',map_location=device)
    D_checkpoint=torch.load(model_dir+'discriminator_param.pkl',map_location=device)
    G.load_state_dict(G_checkpoint['model_state_dict'])
    D.load_state_dict(D_checkpoint['model_state_dict'])
    G.to(device)
    D.to(device)
    G.train()
    D.train()

    G_optimizer.load_state_dict(G_checkpoint['optimizer_state_dict'])
    D_optimizer.load_state_dict(D_checkpoint['optimizer_state_dict'])
    
    train_hist=G_checkpoint['train_hist']
    epoch_start=G_checkpoint['epoch']
    epoch_end=epoch_start+train_epoch
  else:
    G.weight_init(mean=0.0, std=0.02)
    D.weight_init(mean=0.0, std=0.02)
    G.to(device)
    D.to(device)
    G.train()
    D.train()
    
    G_optimizer=optim.Adam(G.parameters(),lr=lrG,betas=(beta1,beta2))
    D_optimizer=optim.Adam(D.parameters(),lr=lrD,betas=(beta1,beta2))

    train_hist={}
    train_hist['D_losses']=[]
    train_hist['G_losses']=[]
    train_hist['per_epoch_ptimes']=[]
    train_hist['total_ptime']=[]
    epoch_end=epoch_start+train_epoch



  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  G = generator(ngf)
  D = discriminator(ndf)
  G_optimizer=optim.Adam(G.parameters(),lr=lrG,betas=(beta1,beta2))
  D_optimizer=optim.Adam(D.parameters(),lr=lrD,betas=(beta1,beta2))
  #loss
  BCE_loss=nn.BCELoss().to(device)
  L1_loss=nn.L1Loss().to(device)

  if(os.path.isfile(model_dir+'generator_param.pkl') and os.path.isfile(model_dir+'discriminator_param.pkl')):
    
    G_checkpoint=torch.load(model_dir+'generator_param.pkl',map_location=device)
    D_checkpoint=torch.load(model_dir+'discriminator_param.pkl',map_location=device)
    G.load_state_dict(G_checkpoint['model_state_dict'])
    D.load_state_dict(D_checkpoint['model_state_dict'])
    G.to(device)
    D.to(device)
    G.train()
    D.train()
    #D.eval()

    G_optimizer.load_state_dict(G_checkpoint['optimizer_state_dict'])
    D_optimizer.load_state_dict(D_checkpoint['optimizer_state_dict'])
    
    train_hist=G_checkpoint['train_hist']
    epoch_start=G_checkpoint['epoch']
    epoch_end=epoch_start+train_epoch
  else:
    print("Previous model not found. Restarting train process...")
    G.weight_init(mean=0.0, std=0.02)
    D.weight_init(mean=0.0, std=0.02)
    G.to(device)
    D.to(device)
    G.train()
    D.train()
    
    
    G_optimizer=optim.Adam(G.parameters(),lr=lrG,betas=(beta1,beta2))
    D_optimizer=optim.Adam(D.parameters(),lr=lrD,betas=(beta1,beta2))

    train_hist={}
    train_hist['D_losses']=[]
    train_hist['G_losses']=[]
    train_hist['per_epoch_ptimes']=[]
    train_hist['total_ptime']=[]
    epoch_start=0
    epoch_end=epoch_start+train_epoch


  for epoch in range(epoch_start,epoch_end):
    D_losses=[]
    G_losses=[]
    epoch_start_time=time.time()
    num_iter=0
    for text_image, inp_image in train_dataloader:
      inp_image,text_image=Variable(inp_image.to(device)),Variable(text_image.to(device))
      D.zero_grad()

      
      D_result=D(inp_image,text_image).squeeze()
      D_real_loss=BCE_loss(D_result,Variable(torch.ones(D_result.size()).to(device)))
      
      G_result=G(inp_image)
      D_result=D(inp_image,G_result).squeeze()
      D_fake_loss=BCE_loss(D_result,Variable(torch.zeros(D_result.size()).to(device)))
      
      D_train_loss=(D_real_loss +D_fake_loss)*0.5
      D_train_loss.backward()
      D_optimizer.step()
      train_hist['D_losses'].append(float(D_train_loss))
      
      D_losses.append(float(D_train_loss))
      D_losses.append(float(0))
      
      #training generator
      G.zero_grad()

      G_result=G(inp_image)
      D_result=D(text_image,G_result).squeeze()

      G_train_loss=BCE_loss(D_result, Variable(torch.ones(D_result.size()).to(device))) + L1_lambda*L1_loss(G_result,text_image)
      G_train_loss.backward()
      G_optimizer.step()

      train_hist['G_losses'].append(float(G_train_loss))
      G_losses.append(float(G_train_loss))
      num_iter+=1

    torch.save({
              'epoch': epoch,
              'model_state_dict': G.state_dict(),
              'optimizer_state_dict': G_optimizer.state_dict(),
              'train_hist': train_hist
              }, model_dir+'generator_param.pkl')

    torch.save({
              'model_state_dict': D.state_dict(),
              'optimizer_state_dict': D_optimizer.state_dict(),
              },model_dir+'discriminator_param.pkl')

    epoch_end_time=time.time()
    per_epoch_ptime=epoch_end_time-epoch_start_time
    print('[%d/%d] - ptime: %.2f, loss_d: %.3f, loss_g: %.3f' % ((epoch + 1), train_epoch, per_epoch_ptime, torch.mean(torch.FloatTensor(D_losses)),
                                                                torch.mean(torch.FloatTensor(G_losses))))
    fixed_p =  output_dir  + str(epoch + 1) + '.png'
    #show_result(G, Variable(inp_image.to(device), volatile=True), text_image.cpu(), (epoch+1), save=True, path=fixed_p)
    train_hist['per_epoch_ptimes'].append(per_epoch_ptime)
    
  end_time=time.time()
  total_ptime=end_time-start_time
  train_hist['total_ptime'].append(total_ptime)
  print("Avg one epoch ptime: %.2f, total %d epochs ptime: %.2f" % (torch.mean(torch.FloatTensor(train_hist['per_epoch_ptimes'])), train_epoch, total_ptime))
    

  with open(report_dir+'train_hist.pkl', 'wb') as f:
      pickle.dump(train_hist, f)

  show_train_hist(train_hist, save=True, path=report_dir + 'train_hist.png')

  
Example #5
0
def train_and_evaluate(param_cuda,
                       dataset,
                       G_model,
                       D_model,
                       G_optimizer,
                       D_optimizer,
                       loss_fn,
                       train_loader,
                       train_epoch,
                       model_dir,
                       restore_file=None):
    '''Train the model and evaluate every epoch'''

    # reload weights from restore_file if specified
    if restore_file is not None:
        restore_path = os.path.join(model_dir, restore_file + '.pth.tar')
        logging.info("Restoring parameters from {}".format(restore_path))
        utils.load_checkpoint(restore_path, D_model, G_model, D_optimizer,
                              G_optimizer)

    # check training starting time
    start_time = time.time()

    # here we are going to save the losses of every epoch
    train_hist = {}
    train_hist['D_model_mean_losses'] = []
    train_hist['G_model_mean_losses'] = []
    train_hist['per_epoch_ptimes'] = []
    train_hist['total_ptime'] = []

    # folder for saving the images
    if not os.path.isdir(dataset + '_results'):
        os.mkdir(dataset + '_results')

    for epoch in range(train_epoch):

        # Run one epoch
        logging.info("Epoch {}/{}".format(epoch + 1, train_epoch))
        epoch_start_time = time.time()

        # compute number of batches in one epoch (one full pass over the training set)
        train_hist = train(G_model, D_model, G_optimizer, D_optimizer, loss_fn,
                           train_loader, param_cuda, train_hist)

        epoch_end_time = time.time()
        per_epoch_ptime = epoch_end_time - epoch_start_time

        #prints in every epoch:
        print("iteration number " + str(epoch))
        print(
            '[%d/%d] - ptime: %.2f, loss_d: %.3f, loss_g: %.3f' %
            ((epoch + 1), train_epoch, per_epoch_ptime,
             torch.mean(torch.FloatTensor(train_hist['D_model_mean_losses'])),
             torch.mean(torch.FloatTensor(train_hist['G_model_mean_losses']))))

        # Save weights
        utils.save_checkpoint(
            {
                'epoch': epoch + 1,
                'D_model_state_dict': D_model.state_dict(),
                'G_model_state_dict': G_model.state_dict(),
                'D_optim_dict': D_optimizer.state_dict(),
                'G_optim_dict': G_optimizer.state_dict()
            },
            is_best=False,
            checkpoint=dataset + '_results/')

        #save test pictures after every epoch:
        p = dataset + '_results/result_epoch_' + str(epoch + 1) + '.png'
        utils.show_result(param_cuda, G_model, (epoch + 1), p, save=True)

        # add epoch time to the training history
        train_hist['per_epoch_ptimes'].append(per_epoch_ptime)

    end_time = time.time()
    total_ptime = end_time - start_time
    train_hist['total_ptime'].append(total_ptime)

    print("Avg per epoch ptime: %.2f, total %d epochs ptime: %.2f" %
          (torch.mean(torch.FloatTensor(
              train_hist['per_epoch_ptimes'])), train_epoch, total_ptime))
    print("Training finish!... save learned parameters")

    # plot training history
    utils.show_train_hist(train_hist,
                          save=True,
                          path=dataset + '_results/_train_hist.png')
Example #6
0
def train_and_evaluate(model, train_dataloader, val_dataloader, optimizer, loss_fn, epochs,
                       restore_file=None):

    total_batch_loss = []
    val_losses = []
    start_epoch = 0
    best_val_loss = float('inf')
    # reload weights from restore_file if specified
    if restore_file is not None:
        restore_path = os.path.join(checkpoint_dir, restore_file + '.pth.tar')
        logging.info("Restoring parameters from {}".format(restore_path))
        checkpoint = utils.load_checkpoint(restore_path, model, optimizer)
        start_epoch = checkpoint['epoch']
        best_val_loss = checkpoint.get('best_loss',float('inf'))



    for epoch in range(epochs + start_epoch):
        # Run one epoch
        logging.info("Epoch {}/{}".format(epoch + 1, epochs))

        # compute number of batches in one epoch (one full pass over the training set)
        batch_loss, best_temp = train(model, optimizer, loss_fn, train_dataloader, val_dataloader, epoch, best_val_loss)
        best_val_loss = best_temp
        total_batch_loss += batch_loss

        # Evaluate for one epoch on validation set
        logging.info("- Training average loss : " + str(val_MSE))

        # Evaluate MSE for one epoch on train and validation set
        train_MSE = evaluate(model, nn.MSELoss(), train_dataloader, device, dtype)
        val_MSE = evaluate(model, nn.MSELoss(), val_dataloader, device, dtype)
        # Evaluate L1 for one epoch on train and validation set
        train_L1 = evaluate(model, nn.L1Loss(), train_dataloader, device, dtype)
        val_L1 = evaluate(model, nn.L1Loss(), val_dataloader, device, dtype)

        # save training history in csv file:
        utils.save_history(epoch, train_MSE, val_MSE, train_L1, val_L1, results_dir)

        # print losses
        logging.info("- Train average MSE loss: " + str(train_MSE))
        logging.info("- Validation average MSE loss: " + str(val_MSE))
        val_losses.append(val_MSE)

        logging.info("- Validation average loss : " + str(val_MSE))

        is_best = val_MSE <= best_val_loss

        # Save weights
        utils.save_checkpoint({'epoch': epoch + 1,
                               'state_dict': model.state_dict(),
                               'optim_dict': optimizer.state_dict()},
                              is_best=is_best,
                              checkpoint=checkpoint_dir)

        # If best_eval, best_save_path
        if is_best:
            logging.info("- Found new best accuracy")
            best_val_loss = val_MSE

            # Save best val loss in a text file in the checkpoint directory
            best_val_path = "val_loss.txt"
            utils.save_dict_to_txt(val_MSE, results_dir, best_val_path, epoch)
            utils.save_checkpoint({'epoch': epoch + 1,
                               'state_dict': model.state_dict(),
                               'optim_dict': optimizer.state_dict(),
                               'best_loss' : val_MSE},
                               is_best=is_best,
                               checkpoint=checkpoint_dir)


        ## plots of losses
        if epoch !=0 or restore_file is not None:
            epoch_train_losses = np.load(os.path.join(results_dir, "epoch_avg_trainloss.npy"))

        np.save(os.path.join(results_dir,"epoch_avg_trainloss"), epoch_train_losses)
        np.save(os.path.join(results_dir, "epoch_val_loss"), val_losses)
    utils.show_train_hist(total_batch_loss, results_dir, show=False, epoch_plot=False, save=True)