class PGAgent(): def __init__(self): self.env = gym.make('CartPole-v1') self.expert_traj_fpath = os.path.join('GAIL', 'expert_trajectories', 'expert_traj_test.npz') self.save_policy_fpath = os.path.join('GAIL','gail_actor1.pt') self.save_rewards_fig = os.path.join('GAIL', 'gail_rewards.png') # self.state_space = 4 self.action_space = 2 # These values are taken from the env's state and action sizes self.actor_net = ActorNet(self.state_space, self.action_space) self.actor_net.to(device=Device) self.actor_optim = torch.optim.Adam(self.actor_net.parameters(), lr = 0.0001) self.critic_net = CriticNet(self.state_space) self.critic_net.to(Device) self.critic_net_optim = torch.optim.Adam(self.critic_net.parameters(), lr = 0.0001) self.discriminator = Discriminator(self.state_space, self.action_space) self.discriminator.to(Device) self.discriminator_optim = torch.optim.Adam(self.discriminator.parameters(), lr = 0.0001) # Storing all the values used to calculate the model losses self.traj_obs = [] self.traj_actions = [] self.traj_rewards = [] self.traj_dones = [] self.traj_logprobs = [] self.traj_logits = [] self.traj_state_values = [] # Discount factor self.gamma = 0.95 # Bias Variance tradeoff (higher value results in high variance, low bias) self.gae_lambda = 0.95 # These two will be used during the training of the policy self.ppo_batch_size = 500 self.ppo_epochs = 12 self.ppo_eps = 0.2 # Discriminator self.num_expert_transitions = 150 # These will be used for the agent to play using the current policy self.max_eps = 5 # Max steps in mountaincar ex is 200 self.max_steps = 800 # documenting the stats self.avg_over = 5 # episodes self.stats = {'episode': 0, 'ep_rew': []} def clear_lists(self): self.traj_obs = [] self.traj_actions = [] self.traj_rewards = [] self.traj_dones = [] self.traj_logprobs = [] self.traj_logits = [] self.traj_state_values = [] # This returns a categorical torch object, which makes it easier to calculate log_prob, prob and sampling from them def get_logits(self, state): logits = self.actor_net(state) return Categorical(logits=logits) def calc_policy_loss(self, states, actions, rewards): assert (torch.is_tensor(states) and torch.is_tensor(actions) and torch.is_tensor(rewards)),\ "states and actions are not in the right format" # The negative sign is for gradient ascent loss = -(self.get_logits(states).log_prob(actions))*rewards return loss.mean() def ppo_calc_log_prob(self, states, actions): obs_tensor = torch.as_tensor(states).float().to(device=Device) actions = torch.as_tensor(actions).float().to(device=Device) logits = self.get_logits(obs_tensor) entropy = logits.entropy() log_prob = logits.log_prob(actions) return log_prob, entropy def get_action(self, state): # Finding the logits and state value using the actor and critic net logits = self.get_logits(state) action = logits.sample() # Sample in categorical finds probability first and then samples values according to that prob return action.item() # This gives the reward to go for each transition in the batch rew_to_go_list = [] rew_sum = 0 for rew, done in zip(reversed(traj_rewards), reversed(traj_dones)): if done: rew_sum = rew rew_to_go_list.append(rew_sum) else: rew_sum = rew + rew_sum rew_to_go_list.append(rew_sum) rew_to_go_list = reversed(rew_to_go_list) return list(rew_to_go_list) # This returns the concatenated state_action tensor used to input to discriminator # obs and actions are a list def concat_state_action(self, obs_list, actions_list, shuffle=False): obs = np.array(obs_list) actions_data = np.array(actions_list) actions = np.zeros((len(actions_list), self.action_space)) actions[np.arange(len(actions_list)), actions_data] = 1 # Converting to one hot encoding state_action = np.concatenate((obs, actions), axis=1) if shuffle: np.random.shuffle(state_action) # Shuffling to break any coorelations state_action = torch.as_tensor(state_action).float().to(Device) return state_action # This uses the discriminator and critic networks to calculate the advantage # of each state action pair, and the targets for the critic network def calc_gae_targets(self): obs_tensor = torch.as_tensor(self.traj_obs).float().to(Device) action_tensor = torch.as_tensor(self.traj_actions).float().to(Device) state_action = self.concat_state_action(self.traj_obs, self.traj_actions) # This calculates how well we have fooled the discriminator, which is equivalent to the # reward at each time step disc_rewards = -torch.log(self.discriminator(state_action)) disc_rewards = disc_rewards.view(-1).tolist() traj_state_values = self.critic_net(obs_tensor).view(-1).tolist() gae = [] targets = [] for i, val, next_val, reward, done in \ zip(range(len(self.traj_dones)), \ reversed(traj_state_values), \ reversed(traj_state_values[1:] + [None]), \ reversed(disc_rewards), \ reversed(self.traj_dones)): # last trajectory maybe cut short because we have a limit on max_steps, # so last done may not always be True if done or i==0: delta = reward - val last_gae = delta else: delta = reward + self.gamma*next_val - val last_gae = delta + self.gamma*self.gae_lambda*last_gae gae.append(last_gae) targets.append(last_gae + val) return list(reversed(gae)), list(reversed(targets)) # We use num_transitions samples from expert trajectory, # and all policy trajectory transitions to train discriminator. def update_discriminator(self, num_transitions): # data stores the expert trajectories used to train the discriminator # data is a dictionary with keys: 'obs', 'acts', 'rews', 'dones' data = np.load(self.expert_traj_fpath) # TODO: Using data is this way blocks a lot of memory. It would be much more efficient to load the numpy # array using a generator obs = data['obs'] actions = data['acts'] # Zeroing Discriminator gradient self.discriminator_optim.zero_grad() loss = nn.BCELoss() ## Sampling from expert trajectories. random_samples_ind = np.random.choice(len(obs), num_transitions) expert_state_action = self.concat_state_action(obs[random_samples_ind], actions[random_samples_ind]) ## Expert Loss, target for expert trajectory taken 0 expert_output = self.discriminator(expert_state_action) expert_traj_loss = loss(expert_output, torch.zeros((num_transitions, 1), device=Device)) ## Sampling policy trajectories policy_state_action = self.concat_state_action(self.traj_obs, self.traj_actions, shuffle=True) ## Policy Traj loss, target for policy trajectory taken 1 policy_traj_output = self.discriminator(policy_state_action) policy_traj_loss = loss(policy_traj_output, torch.ones((policy_traj_output.shape[0], 1), device=Device)) # Updating the Discriminator D_loss = expert_traj_loss + policy_traj_loss D_loss.backward() self.discriminator_optim.step() def train_gail(self): assert (len(self.traj_obs)==len(self.traj_actions)==len(self.traj_dones)), "Size of traj lists don't match" # We use self.num_expert_transitions and all saved policy transitions from play() self.update_discriminator(self.num_expert_transitions) # Finding old log prob. # If the number of transistions are too large, this could also be broken down and calculated in batches # This uses the traj_states and traj_actions to calculate the log_probs of each action with torch.no_grad(): old_logprob, _ = self.ppo_calc_log_prob(self.traj_obs, self.traj_actions) old_logprob.detach() traj_gae, traj_targets = self.calc_gae_targets() # Performing ppo policy updates in batches for epoch in range(self.ppo_epochs): for batch_offs in range(0, len(self.traj_dones), self.ppo_batch_size): batch_obs = self.traj_obs[batch_offs:batch_offs + self.ppo_batch_size] batch_actions = self.traj_actions[batch_offs:batch_offs + self.ppo_batch_size] batch_gae = traj_gae[batch_offs:batch_offs + self.ppo_batch_size] batch_targets = traj_targets[batch_offs:batch_offs + self.ppo_batch_size] batch_old_logprob = old_logprob[batch_offs:batch_offs + self.ppo_batch_size] # Zero the gradients self.actor_optim.zero_grad() self.critic_net_optim.zero_grad() # Critic Loss batch_obs_tensor = torch.as_tensor(batch_obs).float().to(Device) state_vals = self.critic_net(batch_obs_tensor).view(-1) batch_targets = torch.as_tensor(batch_targets).float().to(Device) critic_loss = F.mse_loss(state_vals, batch_targets) # Policy and Entropy Loss log_prob, entropy = self.ppo_calc_log_prob(batch_obs, batch_actions) batch_ratio = torch.exp(log_prob - batch_old_logprob) batch_gae = torch.as_tensor(batch_gae).float().to(Device) unclipped_objective = batch_ratio * batch_gae clipped_objective = torch.clamp(batch_ratio, 1 - self.ppo_eps, 1 + self.ppo_eps) * batch_gae policy_loss = -torch.min(clipped_objective, unclipped_objective).mean() entropy_loss = -entropy.mean() # Performing backprop critic_loss.backward() # Here both policy_loss and entropy_loss calculate grad values in the actor net. # By using retain_graph, the next backward call will add onto the previous grad values. policy_loss.backward(retain_graph=True) entropy_loss.backward() # print('Losses: ', (critic_loss.shape, policy_loss.shape, entropy_loss.shape)) # print('critic grad values: ', self.critic_net.fc1.weight.grad) # print('actor grad values: ', self.actor_net.fc1.weight.grad) # Updating the networks self.actor_optim.step() self.critic_net_optim.step() # The agent will play self.max_eps episodes using the current policy, and train on that data def play(self, rendering): self.clear_lists() saved_transitions = 0 for ep in range(self.max_eps): obs = self.env.reset() ep_reward = 0 for step in range(self.max_steps): if rendering==True: self.env.render() self.traj_obs.append(obs) obs = torch.from_numpy(obs).float().to(device=Device) # get_action() will run obs through actor network and find the action to take action = self.get_action(obs) # We are saving the reward here, but this will not be used in the optimization of the policy # or discriminator, it is only used to track our progress. obs, rew, done, info = self.env.step(action) ep_reward += rew self.traj_actions.append(action) self.traj_rewards.append(rew) saved_transitions += 1 if done: # We will not save the last observation, since it is essentially a dead state # This will result in having the same length of obs, action, reward and dones deque self.traj_dones.append(done) self.stats['ep_rew'].append(ep_reward) self.stats['episode'] += 1 break else: self.traj_dones.append(done) # print(f" {ep} episodes over.", end='\r') print('episode over. Reward: ', ep_reward) self.train_gail() def run(self, model_path, policy_iterations = 65, show_renders_every = 20, renders = True): for i in range(policy_iterations): if i%show_renders_every==0: self.play(rendering=renders) else: self.play(rendering=False) print(f" Policy updated {i} times") torch.save(self.actor_net.state_dict(), model_path) print('model saved at: ', model_path) def plot_rewards(self, avg_over=10): graph_x = np.arange(self.stats['episode']) graph_y = np.array(self.stats['ep_rew']) assert (len(graph_x) == len(graph_y)), "Plot axes do not match" graph_x_averaged = [mean(arr) for arr in np.array_split(graph_x, len(graph_x)/avg_over)] graph_y_averaged = [mean(arr) for arr in np.array_split(graph_y, len(graph_y)/avg_over)] plt.plot(graph_x_averaged, graph_y_averaged) plt.savefig(self.save_rewards_fig)
def adversarial_train(img_data_loader, vgg_cutoff_layer=36, num_epochs=2000, decay_factor=0.1, initial_lr=0.0001, adversarial_loss_weight=0.001, checkpoint=None, save=True): if checkpoint is not None: imported_checkpoint = torch.load(checkpoint) generator = imported_checkpoint['generator'] starting_epoch = 0 discriminator = Discriminator() generator_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, generator.parameters()), lr=initial_lr) discriminator_optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, discriminator.parameters()), lr=initial_lr) else: generator = Generator() starting_epoch = 0 discriminator = Discriminator() generator_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, generator.parameters()), lr=initial_lr) discriminator_optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, discriminator.parameters()), lr=initial_lr) vgg = ChoppedVGG19(vgg_cutoff_layer) # generator_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, generator.parameters()), lr=initial_lr) # discriminator_optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, discriminator.parameters()), lr=initial_lr) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Push everything to gpu if it's available content_criterion = nn.MSELoss().to(device) adversarial_criterion = nn.BCEWithLogitsLoss().to(device) generator.to(device) discriminator.to(device) vgg.to(device) for epoch in range(starting_epoch, num_epochs): running_perceptual_loss = 0.0 running_adversarial_loss = 0.0 for ii, (hr_imgs, lr_imgs) in enumerate(tqdm(img_data_loader)): hr_imgs, lr_imgs = hr_imgs.to(device), lr_imgs.to(device) # Forwardpropagate through generator sr_imgs = generator(lr_imgs) sr_vgg_feature_maps = vgg(sr_imgs) hr_vgg_feature_maps = vgg(hr_imgs).detach() # Try and discriminate fakes sr_discriminator_logprob = discriminator(sr_imgs) # Calculate loss for generator content_loss = content_criterion(sr_vgg_feature_maps, hr_vgg_feature_maps) adversarial_loss = adversarial_criterion(sr_discriminator_logprob, torch.ones_like(sr_discriminator_logprob)) perceptual_loss = content_loss + adversarial_loss_weight*adversarial_loss running_perceptual_loss += perceptual_loss.item() del sr_vgg_feature_maps, hr_vgg_feature_maps, sr_discriminator_logprob # Backpropagate and update generator generator_optimizer.zero_grad() perceptual_loss.backward() generator_optimizer.step() # Now for the discriminator sr_discriminator_logprob = discriminator(sr_imgs.detach()) hr_discriminator_logprob = discriminator(hr_imgs) adversarial_loss = adversarial_criterion(sr_discriminator_logprob, torch.zeros_like(sr_discriminator_logprob)) + adversarial_criterion(hr_discriminator_logprob, torch.ones_like(hr_discriminator_logprob)) running_adversarial_loss += adversarial_loss.item() # Backpropagate and update discriminator discriminator_optimizer.zero_grad() adversarial_loss.backward() discriminator_optimizer.step() del lr_imgs, hr_imgs, sr_imgs, sr_discriminator_logprob, hr_discriminator_logprob print("Epoch number {}".format(epoch)) print("Average Perceptual Loss: {}".format(running_perceptual_loss/len(img_data_loader))) print("Average Adversarial Loss: {}".format(running_adversarial_loss/len(img_data_loader))) if save: # Save the final pretrained model if you're going to continue later torch.save({'epoch': epoch, 'generator': generator, 'generator_optimizer': generator_optimizer, 'discriminator': discriminator, 'discriminator_optimizer':discriminator_optimizer}, 'adversarial_training_checkpoint_CelebA_HQ.pth.tar')
def main(): env = DialogEnvironment() experiment_name = args.logdir.split('/')[1] #model name torch.manual_seed(args.seed) #TODO actor = Actor(hidden_size=args.hidden_size,num_layers=args.num_layers,device='cuda',input_size=args.input_size,output_size=args.input_size) critic = Critic(hidden_size=args.hidden_size,num_layers=args.num_layers,input_size=args.input_size,seq_len=args.seq_len) discrim = Discriminator(hidden_size=args.hidden_size,num_layers=args.hidden_size,input_size=args.input_size,seq_len=args.seq_len) actor.to(device), critic.to(device), discrim.to(device) actor_optim = optim.Adam(actor.parameters(), lr=args.learning_rate) critic_optim = optim.Adam(critic.parameters(), lr=args.learning_rate, weight_decay=args.l2_rate) discrim_optim = optim.Adam(discrim.parameters(), lr=args.learning_rate) # load demonstrations writer = SummaryWriter(args.logdir) if args.load_model is not None: #TODO saved_ckpt_path = os.path.join(os.getcwd(), 'save_model', str(args.load_model)) ckpt = torch.load(saved_ckpt_path) actor.load_state_dict(ckpt['actor']) critic.load_state_dict(ckpt['critic']) discrim.load_state_dict(ckpt['discrim']) episodes = 0 train_discrim_flag = True for iter in range(args.max_iter_num): actor.eval(), critic.eval() memory = deque() steps = 0 scores = [] similarity_scores = [] while steps < args.total_sample_size: scores = [] similarity_scores = [] state, expert_action, raw_state, raw_expert_action = env.reset() score = 0 similarity_score = 0 state = state[:args.seq_len,:] expert_action = expert_action[:args.seq_len,:] state = state.to(device) expert_action = expert_action.to(device) for _ in range(10000): steps += 1 mu, std = actor(state.resize(1,args.seq_len,args.input_size)) #TODO: gotta be a better way to resize. action = get_action(mu.cpu(), std.cpu())[0] for i in range(5): emb_sum = expert_action[i,:].sum().cpu().item() if emb_sum == 0: # print(i) action[i:,:] = 0 # manual padding break done= env.step(action) irl_reward = get_reward(discrim, state, action, args) if done: mask = 0 else: mask = 1 memory.append([state, torch.from_numpy(action).to(device), irl_reward, mask,expert_action]) score += irl_reward similarity_score += get_cosine_sim(expert=expert_action,action=action.squeeze(),seq_len=5) #print(get_cosine_sim(s1=expert_action,s2=action.squeeze(),seq_len=5),'sim') if done: break episodes += 1 scores.append(score) similarity_scores.append(similarity_score) score_avg = np.mean(scores) similarity_score_avg = np.mean(similarity_scores) print('{}:: {} episode score is {:.2f}'.format(iter, episodes, score_avg)) print('{}:: {} episode similarity score is {:.2f}'.format(iter, episodes, similarity_score_avg)) actor.train(), critic.train(), discrim.train() if train_discrim_flag: expert_acc, learner_acc = train_discrim(discrim, memory, discrim_optim, args) print("Expert: %.2f%% | Learner: %.2f%%" % (expert_acc * 100, learner_acc * 100)) writer.add_scalar('log/expert_acc', float(expert_acc), iter) #logg writer.add_scalar('log/learner_acc', float(learner_acc), iter) #logg writer.add_scalar('log/avg_acc', float(learner_acc + expert_acc)/2, iter) #logg if args.suspend_accu_exp is not None: #only if not None do we check. if expert_acc > args.suspend_accu_exp and learner_acc > args.suspend_accu_gen: train_discrim_flag = False train_actor_critic(actor, critic, memory, actor_optim, critic_optim, args) writer.add_scalar('log/score', float(score_avg), iter) writer.add_scalar('log/similarity_score', float(similarity_score_avg), iter) writer.add_text('log/raw_state', raw_state[0],iter) raw_action = get_raw_action(action) #TODO writer.add_text('log/raw_action', raw_action,iter) writer.add_text('log/raw_expert_action', raw_expert_action,iter) if iter % 100: score_avg = int(score_avg) # Open a file with access mode 'a' file_object = open(experiment_name+'.txt', 'a') result_str = str(iter) + '|' + raw_state[0] + '|' + raw_action + '|' + raw_expert_action + '\n' # Append at the end of file file_object.write(result_str) # Close the file file_object.close() model_path = os.path.join(os.getcwd(),'save_model') if not os.path.isdir(model_path): os.makedirs(model_path) ckpt_path = os.path.join(model_path, experiment_name + '_ckpt_'+ str(score_avg)+'.pth.tar') save_checkpoint({ 'actor': actor.state_dict(), 'critic': critic.state_dict(), 'discrim': discrim.state_dict(), 'args': args, 'score': score_avg, }, filename=ckpt_path)
class Trainer(): def __init__(self, config): self.batch_size = config.batchSize self.epochs = config.epochs self.use_cycle_loss = config.cycleLoss self.cycle_multiplier = config.cycleMultiplier self.use_identity_loss = config.identityLoss self.identity_multiplier = config.identityMultiplier self.load_models = config.loadModels self.data_x_loc = config.dataX self.data_y_loc = config.dataY self.device = "cuda" if torch.cuda.is_available() else "cpu" self.init_models() self.init_data_loaders() self.g_optimizer = torch.optim.Adam(list(self.G_X.parameters()) + list(self.G_Y.parameters()), lr=config.lr) self.d_optimizer = torch.optim.Adam(list(self.D_X.parameters()) + list(self.D_Y.parameters()), lr=config.lr) self.scheduler_g = torch.optim.lr_scheduler.StepLR(self.g_optimizer, step_size=1, gamma=0.95) self.output_path = "./outputs/" self.img_width = 256 self.img_height = 256 # Load/Construct the models def init_models(self): self.G_X = Generator(3, 3, nn.InstanceNorm2d) self.D_X = Discriminator(3) self.G_Y = Generator(3, 3, nn.InstanceNorm2d) self.D_Y = Discriminator(3) if self.load_models: self.G_X.load_state_dict( torch.load(self.output_path + "models/G_X", map_location='cpu')) self.G_Y.load_state_dict( torch.load(self.output_path + "models/G_Y", map_location='cpu')) self.D_X.load_state_dict( torch.load(self.output_path + "models/D_X", map_location='cpu')) self.D_Y.load_state_dict( torch.load(self.output_path + "models/D_Y", map_location='cpu')) else: self.G_X.apply(init_func) self.G_Y.apply(init_func) self.D_X.apply(init_func) self.D_Y.apply(init_func) self.G_X.to(self.device) self.G_Y.to(self.device) self.D_X.to(self.device) self.D_Y.to(self.device) # Initialize data loaders and image transformer def init_data_loaders(self): transform = transforms.Compose([ transforms.Resize((self.img_width, self.img_height)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) X_folder = torchvision.datasets.ImageFolder(self.data_x_loc, transform) self.X_loader = torch.utils.data.DataLoader(X_folder, batch_size=self.batch_size, shuffle=True) Y_folder = torchvision.datasets.ImageFolder(self.data_y_loc, transform) self.Y_loader = torch.utils.data.DataLoader(Y_folder, batch_size=self.batch_size, shuffle=True) def save_models(self): torch.save(self.G_X.state_dict(), self.output_path + "models/G_X") torch.save(self.D_X.state_dict(), self.output_path + "models/D_X") torch.save(self.G_Y.state_dict(), self.output_path + "models/G_Y") torch.save(self.D_Y.state_dict(), self.output_path + "models/D_Y") # Reset gradients for all models, needed for between every training def reset_gradients(self): self.g_optimizer.zero_grad() self.d_optimizer.zero_grad() # Sample image from training data every %x epoch and save them for judging def save_samples(self, epoch): x_iter = iter(self.X_loader) y_iter = iter(self.Y_loader) img_data_x, _ = next(x_iter) img_data_y, _ = next(y_iter) original_x = np.array(img_data_x[0]) generated_y = np.array( self.G_Y(img_data_x[0].view(1, 3, self.img_width, self.img_height).to( self.device)).cpu().detach())[0] original_y = np.array(img_data_y[0]) generated_x = np.array( self.G_X(img_data_y[0].view(1, 3, self.img_width, self.img_height).to( self.device)).cpu().detach())[0] def prepare_image(img): img = img.transpose((1, 2, 0)) return img / 2 + 0.5 original_x = prepare_image(original_x) generated_y = prepare_image(generated_y) original_y = prepare_image(original_y) generated_x = prepare_image(generated_x) plt.imsave('./outputs/samples/original_X_{}.png'.format(epoch), original_x) plt.imsave('./outputs/samples/original_Y_{}.png'.format(epoch), original_y) plt.imsave('./outputs/samples/generated_X_{}.png'.format(epoch), generated_x) plt.imsave('./outputs/samples/generated_Y_{}.png'.format(epoch), generated_y) # Training loop def train(self): D_X_losses = [] D_Y_losses = [] G_X_losses = [] G_Y_losses = [] for epoch in range(self.epochs): print("======") print("Epoch {}!".format(epoch + 1)) # Track progress if epoch % 5 == 0: self.save_samples(epoch) # Paper reduces lr after 100 epochs if epoch > 100: self.scheduler_g.step() for (data_X, _), (data_Y, _) in zip(self.X_loader, self.Y_loader): data_X = data_X.to(self.device) data_Y = data_Y.to(self.device) # ===================================== # Train Discriminators # ===================================== # Train fake X self.reset_gradients() fake_X = self.G_X(data_Y) out_fake_X = self.D_X(fake_X) d_x_f_loss = torch.mean(out_fake_X**2) d_x_f_loss.backward() self.d_optimizer.step() # Train fake Y self.reset_gradients() fake_Y = self.G_Y(data_X) out_fake_Y = self.D_Y(fake_Y) d_y_f_loss = torch.mean(out_fake_Y**2) d_y_f_loss.backward() self.d_optimizer.step() # Train true X self.reset_gradients() out_true_X = self.D_X(data_X) d_x_t_loss = torch.mean((out_true_X - 1)**2) d_x_t_loss.backward() self.d_optimizer.step() # Train true Y self.reset_gradients() out_true_Y = self.D_Y(data_Y) d_y_t_loss = torch.mean((out_true_Y - 1)**2) d_y_t_loss.backward() self.d_optimizer.step() D_X_losses.append([ d_x_t_loss.cpu().detach().numpy(), d_x_f_loss.cpu().detach().numpy() ]) D_Y_losses.append([ d_y_t_loss.cpu().detach().numpy(), d_y_f_loss.cpu().detach().numpy() ]) # ===================================== # Train GENERATORS # ===================================== # Cycle X -> Y -> X self.reset_gradients() fake_Y = self.G_Y(data_X) out_fake_Y = self.D_Y(fake_Y) g_loss1 = torch.mean((out_fake_Y - 1)**2) if self.use_cycle_loss: reconst_X = self.G_X(fake_Y) g_loss2 = self.cycle_multiplier * torch.mean( (data_X - reconst_X)**2) G_Y_losses.append([ g_loss1.cpu().detach().numpy(), g_loss2.cpu().detach().numpy() ]) g_loss = g_loss1 + g_loss2 g_loss.backward() self.g_optimizer.step() # Cycle Y -> X -> Y self.reset_gradients() fake_X = self.G_X(data_Y) out_fake_X = self.D_X(fake_X) g_loss1 = torch.mean((out_fake_X - 1)**2) if self.use_cycle_loss: reconst_Y = self.G_Y(fake_X) g_loss2 = self.cycle_multiplier * torch.mean( (data_Y - reconst_Y)**2) G_X_losses.append([ g_loss1.cpu().detach().numpy(), g_loss2.cpu().detach().numpy() ]) g_loss = g_loss1 + g_loss2 g_loss.backward() self.g_optimizer.step() # ===================================== # Train image IDENTITY # ===================================== if self.use_identity_loss: self.reset_gradients() # X should be same after G(X) same_X = self.G_X(data_X) g_loss = self.identity_multiplier * torch.mean( (data_X - same_X)**2) g_loss.backward() self.g_optimizer.step() # Y should be same after G(Y) same_Y = self.G_X(data_Y) g_loss = self.identity_multiplier * torch.mean( (data_Y - same_Y)**2) g_loss.backward() self.g_optimizer.step() # Epoch done, save models self.save_models() # Save losses for analysis np.save(self.output_path + 'losses/G_X_losses.npy', np.array(G_X_losses)) np.save(self.output_path + 'losses/G_Y_losses.npy', np.array(G_Y_losses)) np.save(self.output_path + 'losses/D_X_losses.npy', np.array(D_X_losses)) np.save(self.output_path + 'losses/D_Y_losses.npy', np.array(D_Y_losses))
def train_ei_adv(self, dataloader, physics, transform, epochs, lr, alpha, ckp_interval, schedule, residual=True, pretrained=None, task='', loss_type='l2', cat=True, report_psnr=False, lr_cos=False): save_path = './ckp/{}_ei_adv_{}'.format(get_timestamp(), task) os.makedirs(save_path, exist_ok=True) generator = UNet(in_channels=self.in_channels, out_channels=self.out_channels, compact=4, residual=residual, circular_padding=True, cat=cat) if pretrained: checkpoint = torch.load(pretrained) generator.load_state_dict(checkpoint['state_dict']) discriminator = Discriminator( (self.in_channels, self.img_width, self.img_height)) generator = generator.to(self.device) discriminator = discriminator.to(self.device) if loss_type == 'l2': criterion_mc = torch.nn.MSELoss().to(self.device) criterion_ei = torch.nn.MSELoss().to(self.device) if loss_type == 'l1': criterion_mc = torch.nn.L1Loss().to(self.device) criterion_ei = torch.nn.L1Loss().to(self.device) criterion_gan = torch.nn.MSELoss().to(self.device) optimizer_G = Adam(generator.parameters(), lr=lr['G'], weight_decay=lr['WD']) optimizer_D = Adam(discriminator.parameters(), lr=lr['D'], weight_decay=0) if report_psnr: log = LOG(save_path, filename='training_loss', field_name=[ 'epoch', 'loss_mc', 'loss_ei', 'loss_g', 'loss_G', 'loss_D', 'psnr', 'mse' ]) else: log = LOG(save_path, filename='training_loss', field_name=[ 'epoch', 'loss_mc', 'loss_ei', 'loss_g', 'loss_G', 'loss_D' ]) for epoch in range(epochs): adjust_learning_rate(optimizer_G, epoch, lr['G'], lr_cos, epochs, schedule) adjust_learning_rate(optimizer_D, epoch, lr['D'], lr_cos, epochs, schedule) loss = closure_ei_adv(generator, discriminator, dataloader, physics, transform, optimizer_G, optimizer_D, criterion_mc, criterion_ei, criterion_gan, alpha, self.dtype, self.device, report_psnr) log.record(epoch + 1, *loss) if report_psnr: print( '{}\tEpoch[{}/{}]\tfc={:.4e}\tti={:.4e}\tg={:.4e}\tG={:.4e}\tD={:.4e}\tpsnr={:.4f}\tmse={:.4e}' .format(get_timestamp(), epoch, epochs, *loss)) else: print( '{}\tEpoch[{}/{}]\tfc={:.4e}\tti={:.4e}\tg={:.4e}\tG={:.4e}\tD={:.4e}' .format(get_timestamp(), epoch, epochs, *loss)) if epoch % ckp_interval == 0 or epoch + 1 == epochs: state = { 'epoch': epoch, 'state_dict_G': generator.state_dict(), 'state_dict_D': discriminator.state_dict(), 'optimizer_G': optimizer_G.state_dict(), 'optimizer_D': optimizer_D.state_dict() } torch.save( state, os.path.join(save_path, 'ckp_{}.pth.tar'.format(epoch))) log.close()
def train(args): # check if results path exists, if not create the folder check_folder(args.results_path) # generator model generator = HourglassNet(high_res=args.high_resolution) generator.to(device) # discriminator model discriminator = Discriminator(input_nc=1) discriminator.to(device) # optimizer optimizer_g = torch.optim.Adam(generator.parameters()) optimizer_d = torch.optim.Adam(discriminator.parameters()) # training parameters feature_weight = 0.5 skip_count = 0 use_gan = args.use_gan print_frequency = 5 # dataloader illum_dataset = IlluminationDataset() illum_dataloader = DataLoader(illum_dataset, batch_size=args.batch_size) # gan loss based on lsgan that uses squared error gan_loss = GANLoss(gan_mode='lsgan') # training for epoch in range(1, args.epochs + 1): for data_idx, data in enumerate(illum_dataloader): source_img, source_light, target_img, target_light = data source_img.to(device) source_light.to(device) target_img.to(device) target_light.to(device) optimizer_g.zero_grad() # if skip connections are required for training, else skip the # connections based on the the training scheme for low-res/high-res # images if args.use_skip: skip_count = 0 else: skip_count = 5 if args.high_resolution else 4 output = generator(source_img, target_light, skip_count, target_img) source_face_feats, source_light_pred, target_face_feats, source_relit_pred = output img_loss = image_and_light_loss(source_relit_pred, target_img, source_light_pred, target_light) feat_loss = feature_loss(source_face_feats, target_face_feats) # if gan loss is used if use_gan: g_loss = gan_loss(discriminator(source_relit_pred), target_is_real=True) else: g_loss = torch.Tensor([0]) total_g_loss = img_loss + g_loss + (feature_weight * feat_loss) total_g_loss.backward() optimizer_g.step() # training the discriminator if use_gan: optimizer_d.zero_grad() pred_real = discriminator(target_img) pred_fake = discriminator(source_relit_pred.detach()) loss_real = gan_loss(pred_real, target_is_real=True) loss_fake = gan_loss(pred_fake, target_is_real=False) d_loss = (loss_real + loss_fake) * 0.5 d_loss.backward() optimizer_d.step() else: loss_real = torch.Tensor([0]) loss_fake = torch.Tensor([0]) if data_idx % print_frequency == 0: print( "Epoch: [{}]/[{}], Iteration: [{}]/[{}], image loss: {}, feature loss: {}, gen fake loss: {}, dis real loss: {}, dis fake loss: {}" .format(epoch, args.epochs + 1, data_idx + 1, len(illum_dataloader), img_loss.item(), feat_loss.item(), g_loss.item(), loss_real.item(), loss_fake.item())) # saving model checkpoint_path = os.path.join(args.results_path, 'checkpoint_epoch_{}.pth'.format(epoch)) checkpoint = { 'generator': generator.state_dict(), 'discriminator': discriminator.state_dict(), 'optimizer_g': optimizer_g.state_dict(), 'optimizer_d': optimizer_d.state_dict() } torch.save(checkpoint, checkpoint_path)