class PGAgent():
    def __init__(self):
        
        self.env = gym.make('CartPole-v1')
        self.expert_traj_fpath = os.path.join('GAIL', 'expert_trajectories', 'expert_traj_test.npz')
        self.save_policy_fpath = os.path.join('GAIL','gail_actor1.pt')
        self.save_rewards_fig = os.path.join('GAIL', 'gail_rewards.png')
        #
        self.state_space = 4
        self.action_space = 2

        # These values are taken from the env's state and action sizes
        self.actor_net = ActorNet(self.state_space, self.action_space)
        self.actor_net.to(device=Device)
        self.actor_optim = torch.optim.Adam(self.actor_net.parameters(), lr = 0.0001)

        self.critic_net = CriticNet(self.state_space)
        self.critic_net.to(Device)
        self.critic_net_optim = torch.optim.Adam(self.critic_net.parameters(), lr = 0.0001)

        self.discriminator = Discriminator(self.state_space, self.action_space)
        self.discriminator.to(Device)
        self.discriminator_optim = torch.optim.Adam(self.discriminator.parameters(), lr = 0.0001)

        # Storing all the values used to calculate the model losses
        self.traj_obs = []
        self.traj_actions = []
        self.traj_rewards = []
        self.traj_dones = []
        self.traj_logprobs = []
        self.traj_logits = []
        self.traj_state_values = []

        # Discount factor
        self.gamma = 0.95
        # Bias Variance tradeoff (higher value results in high variance, low bias)
        self.gae_lambda = 0.95

        # These two will be used during the training of the policy
        self.ppo_batch_size = 500
        self.ppo_epochs = 12
        self.ppo_eps = 0.2

        # Discriminator
        self.num_expert_transitions = 150

        # These will be used for the agent to play using the current policy
        self.max_eps = 5
        # Max steps in mountaincar ex is 200
        self.max_steps = 800

        # documenting the stats
        self.avg_over = 5 # episodes
        self.stats = {'episode': 0, 'ep_rew': []}

    def clear_lists(self):
        self.traj_obs = []
        self.traj_actions = []
        self.traj_rewards = []
        self.traj_dones = []
        self.traj_logprobs = []
        self.traj_logits = []
        self.traj_state_values = []

    # This returns a categorical torch object, which makes it easier to calculate log_prob, prob and sampling from them
    def get_logits(self, state):
        logits = self.actor_net(state)
        return Categorical(logits=logits)

    def calc_policy_loss(self, states, actions, rewards):
        assert (torch.is_tensor(states) and torch.is_tensor(actions) and torch.is_tensor(rewards)),\
             "states and actions are not in the right format"

        # The negative sign is for gradient ascent
        loss = -(self.get_logits(states).log_prob(actions))*rewards
        return loss.mean()

    def ppo_calc_log_prob(self, states, actions):
        obs_tensor = torch.as_tensor(states).float().to(device=Device)
        actions = torch.as_tensor(actions).float().to(device=Device)
        logits = self.get_logits(obs_tensor)
        entropy = logits.entropy()
        log_prob = logits.log_prob(actions)

        return log_prob, entropy

    def get_action(self, state):
        
        # Finding the logits and state value using the actor and critic net
        logits = self.get_logits(state)
        action = logits.sample()

        # Sample in categorical finds probability first and then samples values according to that prob
        return action.item()


        # This gives the reward to go for each transition in the batch
        rew_to_go_list = []
        rew_sum = 0
        for rew, done in zip(reversed(traj_rewards), reversed(traj_dones)):
            if done:
                rew_sum = rew
                rew_to_go_list.append(rew_sum)
            else:
                rew_sum = rew + rew_sum
                rew_to_go_list.append(rew_sum)

        rew_to_go_list = reversed(rew_to_go_list)
        return list(rew_to_go_list)

    # This returns the concatenated state_action tensor used to input to discriminator
    # obs and actions are a list
    def concat_state_action(self, obs_list, actions_list, shuffle=False):
        obs = np.array(obs_list)
        actions_data = np.array(actions_list)
        actions = np.zeros((len(actions_list), self.action_space))
        actions[np.arange(len(actions_list)), actions_data] = 1  # Converting to one hot encoding

        state_action = np.concatenate((obs, actions), axis=1)
        if shuffle:
            np.random.shuffle(state_action)  # Shuffling to break any coorelations

        state_action = torch.as_tensor(state_action).float().to(Device)

        return state_action

    # This uses the discriminator and critic networks to calculate the advantage 
    # of each state action pair, and the targets for the critic network
    def calc_gae_targets(self):

        obs_tensor = torch.as_tensor(self.traj_obs).float().to(Device)
        action_tensor = torch.as_tensor(self.traj_actions).float().to(Device)
        state_action = self.concat_state_action(self.traj_obs, self.traj_actions)

        # This calculates how well we have fooled the discriminator, which is equivalent to the 
        # reward at each time step
        disc_rewards = -torch.log(self.discriminator(state_action))
        disc_rewards = disc_rewards.view(-1).tolist()

        traj_state_values = self.critic_net(obs_tensor).view(-1).tolist()
        gae = []
        targets = []

        for i, val, next_val, reward, done in \
        zip(range(len(self.traj_dones)), \
        reversed(traj_state_values), \
        reversed(traj_state_values[1:] + [None]), \
        reversed(disc_rewards), \
        reversed(self.traj_dones)):

            # last trajectory maybe cut short because we have a limit on max_steps,
            # so last done may not always be True
            if done or i==0: 
                delta = reward - val
                last_gae = delta
            else:
                delta = reward + self.gamma*next_val - val
                last_gae = delta + self.gamma*self.gae_lambda*last_gae
            
            gae.append(last_gae)
            targets.append(last_gae + val)

        return list(reversed(gae)), list(reversed(targets))

    # We use num_transitions samples from expert trajectory, 
    # and all policy trajectory transitions to train discriminator.
    def update_discriminator(self, num_transitions):

        # data stores the expert trajectories used to train the discriminator
        # data is a dictionary with keys: 'obs', 'acts', 'rews', 'dones'
        data = np.load(self.expert_traj_fpath)
        # TODO: Using data is this way blocks a lot of memory. It would be much more efficient to load the numpy
        # array using a generator
        obs = data['obs']
        actions = data['acts']

        # Zeroing Discriminator gradient
        self.discriminator_optim.zero_grad()
        loss = nn.BCELoss()

        ## Sampling from expert trajectories. 
        random_samples_ind = np.random.choice(len(obs), num_transitions) 
        expert_state_action = self.concat_state_action(obs[random_samples_ind], actions[random_samples_ind])

        ## Expert Loss, target for expert trajectory taken 0
        expert_output = self.discriminator(expert_state_action)
        expert_traj_loss = loss(expert_output, torch.zeros((num_transitions, 1), device=Device))
        
        ## Sampling policy trajectories
        policy_state_action = self.concat_state_action(self.traj_obs, self.traj_actions, shuffle=True)

        ## Policy Traj loss, target for policy trajectory taken 1
        policy_traj_output = self.discriminator(policy_state_action)
        policy_traj_loss = loss(policy_traj_output, torch.ones((policy_traj_output.shape[0], 1), device=Device))

        # Updating the Discriminator
        D_loss = expert_traj_loss + policy_traj_loss
        D_loss.backward()
        self.discriminator_optim.step()

    def train_gail(self):

        assert (len(self.traj_obs)==len(self.traj_actions)==len(self.traj_dones)), "Size of traj lists don't match"

        # We use self.num_expert_transitions and all saved policy transitions from play()
        self.update_discriminator(self.num_expert_transitions)
        
        # Finding old log prob.
        # If the number of transistions are too large, this could also be broken down and calculated in batches
        # This uses the traj_states and traj_actions to calculate the log_probs of each action
        with torch.no_grad():
            old_logprob, _ = self.ppo_calc_log_prob(self.traj_obs, self.traj_actions)
            old_logprob.detach()

            traj_gae, traj_targets = self.calc_gae_targets()

        # Performing ppo policy updates in batches
        for epoch in range(self.ppo_epochs):
            for batch_offs in range(0, len(self.traj_dones), self.ppo_batch_size):
                batch_obs = self.traj_obs[batch_offs:batch_offs + self.ppo_batch_size]
                batch_actions = self.traj_actions[batch_offs:batch_offs + self.ppo_batch_size]
                batch_gae = traj_gae[batch_offs:batch_offs + self.ppo_batch_size]
                batch_targets = traj_targets[batch_offs:batch_offs + self.ppo_batch_size]
                batch_old_logprob = old_logprob[batch_offs:batch_offs + self.ppo_batch_size]

                # Zero the gradients
                self.actor_optim.zero_grad()
                self.critic_net_optim.zero_grad()

                # Critic Loss
                batch_obs_tensor = torch.as_tensor(batch_obs).float().to(Device)
                state_vals = self.critic_net(batch_obs_tensor).view(-1)
                batch_targets = torch.as_tensor(batch_targets).float().to(Device)
                critic_loss = F.mse_loss(state_vals, batch_targets)
                
                # Policy and Entropy Loss
                log_prob, entropy = self.ppo_calc_log_prob(batch_obs, batch_actions)
                batch_ratio = torch.exp(log_prob - batch_old_logprob)
                batch_gae = torch.as_tensor(batch_gae).float().to(Device)
                unclipped_objective = batch_ratio * batch_gae
                clipped_objective = torch.clamp(batch_ratio, 1 - self.ppo_eps, 1 + self.ppo_eps) * batch_gae
                policy_loss = -torch.min(clipped_objective, unclipped_objective).mean()
                entropy_loss = -entropy.mean()

                # Performing backprop
                critic_loss.backward()
                # Here both policy_loss and entropy_loss calculate grad values in the actor net.
                # By using retain_graph, the next backward call will add onto the previous grad values.
                policy_loss.backward(retain_graph=True)
                entropy_loss.backward()

                # print('Losses: ', (critic_loss.shape, policy_loss.shape, entropy_loss.shape))
                # print('critic grad values: ', self.critic_net.fc1.weight.grad)
                # print('actor grad values: ', self.actor_net.fc1.weight.grad)

                # Updating the networks
                self.actor_optim.step()
                self.critic_net_optim.step()

    # The agent will play self.max_eps episodes using the current policy, and train on that data
    def play(self, rendering):
        
        self.clear_lists()
        saved_transitions = 0
        for ep in range(self.max_eps):
            obs = self.env.reset()
            ep_reward = 0

            for step in range(self.max_steps):
                
                if rendering==True:
                    self.env.render()

                self.traj_obs.append(obs)
                obs = torch.from_numpy(obs).float().to(device=Device)
                
                # get_action() will run obs through actor network and find the action to take
                action = self.get_action(obs)
                
                # We are saving the reward here, but this will not be used in the optimization of the policy
                # or discriminator, it is only used to track our progress.
                obs, rew, done, info = self.env.step(action)
                ep_reward += rew

                self.traj_actions.append(action)
                self.traj_rewards.append(rew)

                saved_transitions += 1

                if done:
                    # We will not save the last observation, since it is essentially a dead state
                    # This will result in having the same length of obs, action, reward and dones deque
                    self.traj_dones.append(done)
                    self.stats['ep_rew'].append(ep_reward)
                    self.stats['episode'] += 1
                    break

                else:
                    self.traj_dones.append(done)
            # print(f" {ep} episodes over.", end='\r')
            print('episode over. Reward: ', ep_reward)

            
        self.train_gail()


    def run(self, model_path, policy_iterations = 65, show_renders_every = 20, renders = True):
        for i in range(policy_iterations):
            if i%show_renders_every==0:
                self.play(rendering=renders)
            else:
                self.play(rendering=False)
            print(f" Policy updated {i} times")
        
        torch.save(self.actor_net.state_dict(), model_path)
        print('model saved at: ', model_path)

    def plot_rewards(self, avg_over=10):       
        graph_x = np.arange(self.stats['episode'])
        graph_y = np.array(self.stats['ep_rew'])

        assert (len(graph_x) == len(graph_y)), "Plot axes do not match"

        graph_x_averaged = [mean(arr) for arr in np.array_split(graph_x, len(graph_x)/avg_over)]
        graph_y_averaged = [mean(arr) for arr in np.array_split(graph_y, len(graph_y)/avg_over)]

        plt.plot(graph_x_averaged, graph_y_averaged)
        plt.savefig(self.save_rewards_fig)
Beispiel #2
0
def adversarial_train(img_data_loader, vgg_cutoff_layer=36, num_epochs=2000, decay_factor=0.1, initial_lr=0.0001, adversarial_loss_weight=0.001, checkpoint=None, save=True):
    if checkpoint is not None:
        imported_checkpoint = torch.load(checkpoint)
        generator = imported_checkpoint['generator']
        starting_epoch = 0
        discriminator = Discriminator()
        generator_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, generator.parameters()), lr=initial_lr)
        discriminator_optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, discriminator.parameters()), lr=initial_lr)
    else:
        generator = Generator()
        starting_epoch = 0
        discriminator = Discriminator()
        generator_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, generator.parameters()), lr=initial_lr)
        discriminator_optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, discriminator.parameters()), lr=initial_lr)
    
    vgg = ChoppedVGG19(vgg_cutoff_layer)
    
    # generator_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, generator.parameters()), lr=initial_lr)
    # discriminator_optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, discriminator.parameters()), lr=initial_lr)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # Push everything to gpu if it's available
    content_criterion = nn.MSELoss().to(device)
    adversarial_criterion = nn.BCEWithLogitsLoss().to(device)
    generator.to(device)
    discriminator.to(device)
    vgg.to(device)

    for epoch in range(starting_epoch, num_epochs):
        running_perceptual_loss = 0.0
        running_adversarial_loss = 0.0
        for ii, (hr_imgs, lr_imgs) in enumerate(tqdm(img_data_loader)):
            hr_imgs, lr_imgs = hr_imgs.to(device), lr_imgs.to(device)

            # Forwardpropagate through generator
            sr_imgs = generator(lr_imgs)
            sr_vgg_feature_maps = vgg(sr_imgs)
            hr_vgg_feature_maps = vgg(hr_imgs).detach()

            # Try and discriminate fakes
            sr_discriminator_logprob = discriminator(sr_imgs)
            
            # Calculate loss for generator
            content_loss = content_criterion(sr_vgg_feature_maps, hr_vgg_feature_maps)
            adversarial_loss = adversarial_criterion(sr_discriminator_logprob, torch.ones_like(sr_discriminator_logprob))
            perceptual_loss = content_loss + adversarial_loss_weight*adversarial_loss
            running_perceptual_loss += perceptual_loss.item()
            del sr_vgg_feature_maps, hr_vgg_feature_maps, sr_discriminator_logprob

            # Backpropagate and update generator
            generator_optimizer.zero_grad()
            perceptual_loss.backward()
            generator_optimizer.step()

            # Now for the discriminator
            sr_discriminator_logprob = discriminator(sr_imgs.detach())
            hr_discriminator_logprob = discriminator(hr_imgs)
            adversarial_loss = adversarial_criterion(sr_discriminator_logprob, torch.zeros_like(sr_discriminator_logprob)) + adversarial_criterion(hr_discriminator_logprob, torch.ones_like(hr_discriminator_logprob))
            running_adversarial_loss += adversarial_loss.item()

            # Backpropagate and update discriminator
            discriminator_optimizer.zero_grad()
            adversarial_loss.backward()
            discriminator_optimizer.step()
            del lr_imgs, hr_imgs, sr_imgs, sr_discriminator_logprob, hr_discriminator_logprob
        print("Epoch number {}".format(epoch))
        print("Average Perceptual Loss: {}".format(running_perceptual_loss/len(img_data_loader)))
        print("Average Adversarial Loss: {}".format(running_adversarial_loss/len(img_data_loader)))

        if save:
        # Save the final pretrained model if you're going to continue later
            torch.save({'epoch': epoch,
                        'generator': generator,
                        'generator_optimizer': generator_optimizer,
                        'discriminator': discriminator,
                        'discriminator_optimizer':discriminator_optimizer},
                        'adversarial_training_checkpoint_CelebA_HQ.pth.tar')
Beispiel #3
0
def main():
    env = DialogEnvironment()
    experiment_name = args.logdir.split('/')[1] #model name

    torch.manual_seed(args.seed)

    #TODO
    actor = Actor(hidden_size=args.hidden_size,num_layers=args.num_layers,device='cuda',input_size=args.input_size,output_size=args.input_size)
    critic = Critic(hidden_size=args.hidden_size,num_layers=args.num_layers,input_size=args.input_size,seq_len=args.seq_len)
    discrim = Discriminator(hidden_size=args.hidden_size,num_layers=args.hidden_size,input_size=args.input_size,seq_len=args.seq_len)
    
    actor.to(device), critic.to(device), discrim.to(device)
    
    actor_optim = optim.Adam(actor.parameters(), lr=args.learning_rate)
    critic_optim = optim.Adam(critic.parameters(), lr=args.learning_rate, 
                              weight_decay=args.l2_rate) 
    discrim_optim = optim.Adam(discrim.parameters(), lr=args.learning_rate)

    # load demonstrations

    writer = SummaryWriter(args.logdir)

    if args.load_model is not None: #TODO
        saved_ckpt_path = os.path.join(os.getcwd(), 'save_model', str(args.load_model))
        ckpt = torch.load(saved_ckpt_path)

        actor.load_state_dict(ckpt['actor'])
        critic.load_state_dict(ckpt['critic'])
        discrim.load_state_dict(ckpt['discrim'])


    
    episodes = 0
    train_discrim_flag = True

    for iter in range(args.max_iter_num):
        actor.eval(), critic.eval()
        memory = deque()

        steps = 0
        scores = []
        similarity_scores = []
        while steps < args.total_sample_size: 
            scores = []
            similarity_scores = []
            state, expert_action, raw_state, raw_expert_action = env.reset()
            score = 0
            similarity_score = 0
            state = state[:args.seq_len,:]
            expert_action = expert_action[:args.seq_len,:]
            state = state.to(device)
            expert_action = expert_action.to(device)
            for _ in range(10000): 

                steps += 1

                mu, std = actor(state.resize(1,args.seq_len,args.input_size)) #TODO: gotta be a better way to resize. 
                action = get_action(mu.cpu(), std.cpu())[0]
                for i in range(5):
                    emb_sum = expert_action[i,:].sum().cpu().item()
                    if emb_sum == 0:
                       # print(i)
                        action[i:,:] = 0 # manual padding
                        break

                done= env.step(action)
                irl_reward = get_reward(discrim, state, action, args)
                if done:
                    mask = 0
                else:
                    mask = 1


                memory.append([state, torch.from_numpy(action).to(device), irl_reward, mask,expert_action])
                score += irl_reward
                similarity_score += get_cosine_sim(expert=expert_action,action=action.squeeze(),seq_len=5)
                #print(get_cosine_sim(s1=expert_action,s2=action.squeeze(),seq_len=5),'sim')
                if done:
                    break

            episodes += 1
            scores.append(score)
            similarity_scores.append(similarity_score)

        score_avg = np.mean(scores)
        similarity_score_avg = np.mean(similarity_scores)
        print('{}:: {} episode score is {:.2f}'.format(iter, episodes, score_avg))
        print('{}:: {} episode similarity score is {:.2f}'.format(iter, episodes, similarity_score_avg))

        actor.train(), critic.train(), discrim.train()
        if train_discrim_flag:
            expert_acc, learner_acc = train_discrim(discrim, memory, discrim_optim, args) 
            print("Expert: %.2f%% | Learner: %.2f%%" % (expert_acc * 100, learner_acc * 100))
            writer.add_scalar('log/expert_acc', float(expert_acc), iter) #logg
            writer.add_scalar('log/learner_acc', float(learner_acc), iter) #logg
            writer.add_scalar('log/avg_acc', float(learner_acc + expert_acc)/2, iter) #logg
            if args.suspend_accu_exp is not None: #only if not None do we check.
                if expert_acc > args.suspend_accu_exp and learner_acc > args.suspend_accu_gen:
                    train_discrim_flag = False

        train_actor_critic(actor, critic, memory, actor_optim, critic_optim, args)
        writer.add_scalar('log/score', float(score_avg), iter)
        writer.add_scalar('log/similarity_score', float(similarity_score_avg), iter)
        writer.add_text('log/raw_state', raw_state[0],iter)
        raw_action = get_raw_action(action) #TODO
        writer.add_text('log/raw_action', raw_action,iter)
        writer.add_text('log/raw_expert_action', raw_expert_action,iter)

        if iter % 100:
            score_avg = int(score_avg)
            # Open a file with access mode 'a'
            file_object = open(experiment_name+'.txt', 'a')

            result_str = str(iter) + '|' + raw_state[0] + '|' + raw_action + '|' + raw_expert_action + '\n'
            # Append at the end of file
            file_object.write(result_str)
            # Close the file
            file_object.close()

            model_path = os.path.join(os.getcwd(),'save_model')
            if not os.path.isdir(model_path):
                os.makedirs(model_path)

            ckpt_path = os.path.join(model_path, experiment_name + '_ckpt_'+ str(score_avg)+'.pth.tar')

            save_checkpoint({
                'actor': actor.state_dict(),
                'critic': critic.state_dict(),
                'discrim': discrim.state_dict(),
                'args': args,
                'score': score_avg,
            }, filename=ckpt_path)
Beispiel #4
0
class Trainer():
    def __init__(self, config):
        self.batch_size = config.batchSize
        self.epochs = config.epochs

        self.use_cycle_loss = config.cycleLoss
        self.cycle_multiplier = config.cycleMultiplier

        self.use_identity_loss = config.identityLoss
        self.identity_multiplier = config.identityMultiplier

        self.load_models = config.loadModels
        self.data_x_loc = config.dataX
        self.data_y_loc = config.dataY

        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.init_models()
        self.init_data_loaders()
        self.g_optimizer = torch.optim.Adam(list(self.G_X.parameters()) +
                                            list(self.G_Y.parameters()),
                                            lr=config.lr)
        self.d_optimizer = torch.optim.Adam(list(self.D_X.parameters()) +
                                            list(self.D_Y.parameters()),
                                            lr=config.lr)
        self.scheduler_g = torch.optim.lr_scheduler.StepLR(self.g_optimizer,
                                                           step_size=1,
                                                           gamma=0.95)

        self.output_path = "./outputs/"
        self.img_width = 256
        self.img_height = 256

    # Load/Construct the models
    def init_models(self):

        self.G_X = Generator(3, 3, nn.InstanceNorm2d)
        self.D_X = Discriminator(3)
        self.G_Y = Generator(3, 3, nn.InstanceNorm2d)
        self.D_Y = Discriminator(3)

        if self.load_models:
            self.G_X.load_state_dict(
                torch.load(self.output_path + "models/G_X",
                           map_location='cpu'))
            self.G_Y.load_state_dict(
                torch.load(self.output_path + "models/G_Y",
                           map_location='cpu'))
            self.D_X.load_state_dict(
                torch.load(self.output_path + "models/D_X",
                           map_location='cpu'))
            self.D_Y.load_state_dict(
                torch.load(self.output_path + "models/D_Y",
                           map_location='cpu'))
        else:
            self.G_X.apply(init_func)
            self.G_Y.apply(init_func)
            self.D_X.apply(init_func)
            self.D_Y.apply(init_func)

        self.G_X.to(self.device)
        self.G_Y.to(self.device)
        self.D_X.to(self.device)
        self.D_Y.to(self.device)

    # Initialize data loaders and image transformer
    def init_data_loaders(self):

        transform = transforms.Compose([
            transforms.Resize((self.img_width, self.img_height)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])

        X_folder = torchvision.datasets.ImageFolder(self.data_x_loc, transform)
        self.X_loader = torch.utils.data.DataLoader(X_folder,
                                                    batch_size=self.batch_size,
                                                    shuffle=True)

        Y_folder = torchvision.datasets.ImageFolder(self.data_y_loc, transform)
        self.Y_loader = torch.utils.data.DataLoader(Y_folder,
                                                    batch_size=self.batch_size,
                                                    shuffle=True)

    def save_models(self):
        torch.save(self.G_X.state_dict(), self.output_path + "models/G_X")
        torch.save(self.D_X.state_dict(), self.output_path + "models/D_X")
        torch.save(self.G_Y.state_dict(), self.output_path + "models/G_Y")
        torch.save(self.D_Y.state_dict(), self.output_path + "models/D_Y")

    # Reset gradients for all models, needed for between every training
    def reset_gradients(self):
        self.g_optimizer.zero_grad()
        self.d_optimizer.zero_grad()

    # Sample image from training data every %x epoch and save them for judging
    def save_samples(self, epoch):
        x_iter = iter(self.X_loader)
        y_iter = iter(self.Y_loader)

        img_data_x, _ = next(x_iter)
        img_data_y, _ = next(y_iter)

        original_x = np.array(img_data_x[0])
        generated_y = np.array(
            self.G_Y(img_data_x[0].view(1, 3, self.img_width,
                                        self.img_height).to(
                                            self.device)).cpu().detach())[0]

        original_y = np.array(img_data_y[0])
        generated_x = np.array(
            self.G_X(img_data_y[0].view(1, 3, self.img_width,
                                        self.img_height).to(
                                            self.device)).cpu().detach())[0]

        def prepare_image(img):
            img = img.transpose((1, 2, 0))
            return img / 2 + 0.5

        original_x = prepare_image(original_x)
        generated_y = prepare_image(generated_y)

        original_y = prepare_image(original_y)
        generated_x = prepare_image(generated_x)

        plt.imsave('./outputs/samples/original_X_{}.png'.format(epoch),
                   original_x)
        plt.imsave('./outputs/samples/original_Y_{}.png'.format(epoch),
                   original_y)

        plt.imsave('./outputs/samples/generated_X_{}.png'.format(epoch),
                   generated_x)
        plt.imsave('./outputs/samples/generated_Y_{}.png'.format(epoch),
                   generated_y)

    # Training loop
    def train(self):
        D_X_losses = []
        D_Y_losses = []

        G_X_losses = []
        G_Y_losses = []

        for epoch in range(self.epochs):
            print("======")
            print("Epoch {}!".format(epoch + 1))

            # Track progress
            if epoch % 5 == 0:
                self.save_samples(epoch)

            # Paper reduces lr after 100 epochs
            if epoch > 100:
                self.scheduler_g.step()

            for (data_X, _), (data_Y, _) in zip(self.X_loader, self.Y_loader):
                data_X = data_X.to(self.device)
                data_Y = data_Y.to(self.device)

                # =====================================
                # Train Discriminators
                # =====================================

                # Train fake X
                self.reset_gradients()
                fake_X = self.G_X(data_Y)
                out_fake_X = self.D_X(fake_X)
                d_x_f_loss = torch.mean(out_fake_X**2)
                d_x_f_loss.backward()
                self.d_optimizer.step()

                # Train fake Y
                self.reset_gradients()
                fake_Y = self.G_Y(data_X)
                out_fake_Y = self.D_Y(fake_Y)
                d_y_f_loss = torch.mean(out_fake_Y**2)
                d_y_f_loss.backward()
                self.d_optimizer.step()

                # Train true X
                self.reset_gradients()
                out_true_X = self.D_X(data_X)
                d_x_t_loss = torch.mean((out_true_X - 1)**2)
                d_x_t_loss.backward()
                self.d_optimizer.step()

                # Train true Y
                self.reset_gradients()
                out_true_Y = self.D_Y(data_Y)
                d_y_t_loss = torch.mean((out_true_Y - 1)**2)
                d_y_t_loss.backward()
                self.d_optimizer.step()

                D_X_losses.append([
                    d_x_t_loss.cpu().detach().numpy(),
                    d_x_f_loss.cpu().detach().numpy()
                ])
                D_Y_losses.append([
                    d_y_t_loss.cpu().detach().numpy(),
                    d_y_f_loss.cpu().detach().numpy()
                ])

                # =====================================
                # Train GENERATORS
                # =====================================

                # Cycle X -> Y -> X
                self.reset_gradients()

                fake_Y = self.G_Y(data_X)
                out_fake_Y = self.D_Y(fake_Y)

                g_loss1 = torch.mean((out_fake_Y - 1)**2)
                if self.use_cycle_loss:
                    reconst_X = self.G_X(fake_Y)
                    g_loss2 = self.cycle_multiplier * torch.mean(
                        (data_X - reconst_X)**2)

                G_Y_losses.append([
                    g_loss1.cpu().detach().numpy(),
                    g_loss2.cpu().detach().numpy()
                ])
                g_loss = g_loss1 + g_loss2
                g_loss.backward()
                self.g_optimizer.step()

                # Cycle Y -> X -> Y
                self.reset_gradients()

                fake_X = self.G_X(data_Y)
                out_fake_X = self.D_X(fake_X)

                g_loss1 = torch.mean((out_fake_X - 1)**2)
                if self.use_cycle_loss:
                    reconst_Y = self.G_Y(fake_X)
                    g_loss2 = self.cycle_multiplier * torch.mean(
                        (data_Y - reconst_Y)**2)

                G_X_losses.append([
                    g_loss1.cpu().detach().numpy(),
                    g_loss2.cpu().detach().numpy()
                ])
                g_loss = g_loss1 + g_loss2
                g_loss.backward()
                self.g_optimizer.step()

                # =====================================
                # Train image IDENTITY
                # =====================================

                if self.use_identity_loss:
                    self.reset_gradients()

                    # X should be same after G(X)
                    same_X = self.G_X(data_X)
                    g_loss = self.identity_multiplier * torch.mean(
                        (data_X - same_X)**2)
                    g_loss.backward()
                    self.g_optimizer.step()

                    # Y should be same after G(Y)
                    same_Y = self.G_X(data_Y)
                    g_loss = self.identity_multiplier * torch.mean(
                        (data_Y - same_Y)**2)
                    g_loss.backward()
                    self.g_optimizer.step()

            # Epoch done, save models
            self.save_models()

        # Save losses for analysis
        np.save(self.output_path + 'losses/G_X_losses.npy',
                np.array(G_X_losses))
        np.save(self.output_path + 'losses/G_Y_losses.npy',
                np.array(G_Y_losses))
        np.save(self.output_path + 'losses/D_X_losses.npy',
                np.array(D_X_losses))
        np.save(self.output_path + 'losses/D_Y_losses.npy',
                np.array(D_Y_losses))
Beispiel #5
0
    def train_ei_adv(self,
                     dataloader,
                     physics,
                     transform,
                     epochs,
                     lr,
                     alpha,
                     ckp_interval,
                     schedule,
                     residual=True,
                     pretrained=None,
                     task='',
                     loss_type='l2',
                     cat=True,
                     report_psnr=False,
                     lr_cos=False):
        save_path = './ckp/{}_ei_adv_{}'.format(get_timestamp(), task)

        os.makedirs(save_path, exist_ok=True)

        generator = UNet(in_channels=self.in_channels,
                         out_channels=self.out_channels,
                         compact=4,
                         residual=residual,
                         circular_padding=True,
                         cat=cat)

        if pretrained:
            checkpoint = torch.load(pretrained)
            generator.load_state_dict(checkpoint['state_dict'])

        discriminator = Discriminator(
            (self.in_channels, self.img_width, self.img_height))

        generator = generator.to(self.device)
        discriminator = discriminator.to(self.device)

        if loss_type == 'l2':
            criterion_mc = torch.nn.MSELoss().to(self.device)
            criterion_ei = torch.nn.MSELoss().to(self.device)
        if loss_type == 'l1':
            criterion_mc = torch.nn.L1Loss().to(self.device)
            criterion_ei = torch.nn.L1Loss().to(self.device)

        criterion_gan = torch.nn.MSELoss().to(self.device)

        optimizer_G = Adam(generator.parameters(),
                           lr=lr['G'],
                           weight_decay=lr['WD'])
        optimizer_D = Adam(discriminator.parameters(),
                           lr=lr['D'],
                           weight_decay=0)

        if report_psnr:
            log = LOG(save_path,
                      filename='training_loss',
                      field_name=[
                          'epoch', 'loss_mc', 'loss_ei', 'loss_g', 'loss_G',
                          'loss_D', 'psnr', 'mse'
                      ])
        else:
            log = LOG(save_path,
                      filename='training_loss',
                      field_name=[
                          'epoch', 'loss_mc', 'loss_ei', 'loss_g', 'loss_G',
                          'loss_D'
                      ])

        for epoch in range(epochs):
            adjust_learning_rate(optimizer_G, epoch, lr['G'], lr_cos, epochs,
                                 schedule)
            adjust_learning_rate(optimizer_D, epoch, lr['D'], lr_cos, epochs,
                                 schedule)

            loss = closure_ei_adv(generator, discriminator, dataloader,
                                  physics, transform, optimizer_G, optimizer_D,
                                  criterion_mc, criterion_ei, criterion_gan,
                                  alpha, self.dtype, self.device, report_psnr)

            log.record(epoch + 1, *loss)

            if report_psnr:
                print(
                    '{}\tEpoch[{}/{}]\tfc={:.4e}\tti={:.4e}\tg={:.4e}\tG={:.4e}\tD={:.4e}\tpsnr={:.4f}\tmse={:.4e}'
                    .format(get_timestamp(), epoch, epochs, *loss))
            else:
                print(
                    '{}\tEpoch[{}/{}]\tfc={:.4e}\tti={:.4e}\tg={:.4e}\tG={:.4e}\tD={:.4e}'
                    .format(get_timestamp(), epoch, epochs, *loss))

            if epoch % ckp_interval == 0 or epoch + 1 == epochs:
                state = {
                    'epoch': epoch,
                    'state_dict_G': generator.state_dict(),
                    'state_dict_D': discriminator.state_dict(),
                    'optimizer_G': optimizer_G.state_dict(),
                    'optimizer_D': optimizer_D.state_dict()
                }
                torch.save(
                    state,
                    os.path.join(save_path, 'ckp_{}.pth.tar'.format(epoch)))
        log.close()
Beispiel #6
0
def train(args):

    # check if results path exists, if not create the folder
    check_folder(args.results_path)

    # generator model
    generator = HourglassNet(high_res=args.high_resolution)
    generator.to(device)

    # discriminator model
    discriminator = Discriminator(input_nc=1)
    discriminator.to(device)

    # optimizer
    optimizer_g = torch.optim.Adam(generator.parameters())
    optimizer_d = torch.optim.Adam(discriminator.parameters())

    # training parameters
    feature_weight = 0.5
    skip_count = 0
    use_gan = args.use_gan
    print_frequency = 5

    # dataloader
    illum_dataset = IlluminationDataset()
    illum_dataloader = DataLoader(illum_dataset, batch_size=args.batch_size)

    # gan loss based on lsgan that uses squared error
    gan_loss = GANLoss(gan_mode='lsgan')

    # training
    for epoch in range(1, args.epochs + 1):

        for data_idx, data in enumerate(illum_dataloader):
            source_img, source_light, target_img, target_light = data

            source_img.to(device)
            source_light.to(device)
            target_img.to(device)
            target_light.to(device)

            optimizer_g.zero_grad()

            # if skip connections are required for training, else skip the
            # connections based on the the training scheme for low-res/high-res
            # images
            if args.use_skip:
                skip_count = 0
            else:
                skip_count = 5 if args.high_resolution else 4

            output = generator(source_img, target_light, skip_count,
                               target_img)

            source_face_feats, source_light_pred, target_face_feats, source_relit_pred = output

            img_loss = image_and_light_loss(source_relit_pred, target_img,
                                            source_light_pred, target_light)
            feat_loss = feature_loss(source_face_feats, target_face_feats)

            # if gan loss is used
            if use_gan:
                g_loss = gan_loss(discriminator(source_relit_pred),
                                  target_is_real=True)
            else:
                g_loss = torch.Tensor([0])

            total_g_loss = img_loss + g_loss + (feature_weight * feat_loss)
            total_g_loss.backward()
            optimizer_g.step()

            # training the discriminator
            if use_gan:
                optimizer_d.zero_grad()
                pred_real = discriminator(target_img)
                pred_fake = discriminator(source_relit_pred.detach())

                loss_real = gan_loss(pred_real, target_is_real=True)
                loss_fake = gan_loss(pred_fake, target_is_real=False)

                d_loss = (loss_real + loss_fake) * 0.5
                d_loss.backward()
                optimizer_d.step()
            else:
                loss_real = torch.Tensor([0])
                loss_fake = torch.Tensor([0])

            if data_idx % print_frequency == 0:
                print(
                    "Epoch: [{}]/[{}], Iteration: [{}]/[{}], image loss: {}, feature loss: {}, gen fake loss: {}, dis real loss: {}, dis fake loss: {}"
                    .format(epoch, args.epochs + 1, data_idx + 1,
                            len(illum_dataloader), img_loss.item(),
                            feat_loss.item(), g_loss.item(), loss_real.item(),
                            loss_fake.item()))

        # saving model
        checkpoint_path = os.path.join(args.results_path,
                                       'checkpoint_epoch_{}.pth'.format(epoch))
        checkpoint = {
            'generator': generator.state_dict(),
            'discriminator': discriminator.state_dict(),
            'optimizer_g': optimizer_g.state_dict(),
            'optimizer_d': optimizer_d.state_dict()
        }
        torch.save(checkpoint, checkpoint_path)