コード例 #1
0
def GetResnet101Features():

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    data_folder = 'C:/Users/paoca/Documents/UVA PHD/NLP/PROJECT/UnnecesaryDataFolder'  # folder with data files saved by create_input_files.py
    data_name = 'coco_5_cap_per_img_5_min_word_freq'

    word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json')
    with open(word_map_file, 'r') as j:
        word_map = json.load(j)

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_loader = torch.utils.data.DataLoader(CaptionDataset(
        data_folder,
        data_name,
        'TRAIN',
        transform=transforms.Compose([normalize])),
                                               batch_size=5,
                                               shuffle=False,
                                               pin_memory=True)

    with torch.no_grad():
        encoder = Encoder()
        encoder.fine_tune(False)

        emb_dim = 512
        decoder_dim = 512
        encoderVae_encoder = EncodeVAE_Encoder(embed_dim=emb_dim,
                                               decoder_dim=decoder_dim,
                                               vocab_size=len(word_map))
        encoderVae_encoder.fine_tune(False)

        encoder.eval()
        encoderVae_encoder.eval()

        encoder = encoder.to(device)
        encoderVae_encoder = encoderVae_encoder.to(device)

        for i, (imgs, caps, caplens) in enumerate(train_loader):
            if i % 100 == 0:
                print(i)

            imgs = imgs.to(device)
            caps = caps.to(device)
            caplens = caplens.to(device)

            res = encoder(imgs)
            h = encoderVae_encoder(imgs, caps, caplens)

            pickle.dump(
                res[0].cpu().numpy(),
                open(
                    "C:/Users/paoca/Documents/UVA PHD/NLP/PROJECT/UnnecesaryDataFolder/TrainResnet101Features/"
                    + str(i) + ".p", "wb"))
            pickle.dump(
                h[0].cpu().numpy(),
                open(
                    "C:/Users/paoca/Documents/UVA PHD/NLP/PROJECT/UnnecesaryDataFolder/TrainResnet101Features/VAE_"
                    + str(i) + ".p", "wb"))
コード例 #2
0
def validate_models(channels):
    """
    Validate trained models
    :param channels: List of compressed channels used
    :return: None
    """

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    test_loader = test_dataloader()
    test_batch = next(iter(test_loader)).to(device)

    reconstructed_images = {}

    for channel in channels:
        NUM_CHANNELS = channel
        encoder = Encoder(NUM_CHANNELS).to(device)
        generator = Generator(NUM_CHANNELS).to(device)

        encoder.load_state_dict(
            torch.load(f"../models/encoder_{NUM_CHANNELS}.model", map_location=torch.device("cpu"))
        )
        generator.load_state_dict(
            torch.load(f"../models/generator_{NUM_CHANNELS}.model", map_location=torch.device("cpu"))
        )

        encoder.eval()
        generator.eval()

        reconstructed_image = generator(encoder(test_batch))
        reconstructed_images[NUM_CHANNELS] = reconstructed_image

    plot_image_grid(test_batch, reconstructed_images, NUM_IMAGES_GRID)
    save_images(test_batch, reconstructed_images)
    calculate_metric(channels)
コード例 #3
0
def main(imgurl):
    # Load word map (word2ix)
    with open('input_files/WORDMAP.json', 'r') as j:
        word_map = json.load(j)
    rev_word_map = {v: k for k, v in word_map.items()}  # ix2word

    # Load model
    decoder = DecoderWithAttention(attention_dim=attention_dim,
                                   embed_dim=emb_dim,
                                   decoder_dim=decoder_dim,
                                   vocab_size=len(word_map),
                                   dropout=dropout)
    decoder_optimizer = torch.optim.Adam(params=filter(
        lambda p: p.requires_grad, decoder.parameters()),
                                         lr=decoder_lr)
    encoder = Encoder()
    encoder.fine_tune(fine_tune_encoder)
    encoder_optimizer = torch.optim.Adam(
        params=filter(lambda p: p.requires_grad, encoder.parameters()),
        lr=encoder_lr) if fine_tune_encoder else None

    decoder.load_state_dict(
        torch.load('output_files/BEST_checkpoint_decoder.pth.tar'))
    encoder.load_state_dict(
        torch.load('output_files/BEST_checkpoint_encoder.pth.tar'))

    decoder = decoder.to(device)
    decoder.eval()
    encoder = encoder.to(device)
    encoder.eval()

    # Encode, decode with attention and beam search
    seq, alphas = caption_image_beam_search(encoder,
                                            decoder,
                                            imgurl,
                                            word_map,
                                            beam_size=5)
    alphas = torch.FloatTensor(alphas)

    # Visualize caption and attention of best sequence
    # visualize_att(img, seq, alphas, rev_word_map, args.smooth)

    words = [rev_word_map[ind] for ind in seq]
    caption = ' '.join(words[1:-1])
    visualize_att(imgurl, seq, alphas, rev_word_map)
コード例 #4
0
def encoder_test(seq_len=4, decoder_batch_size=2, model_name='xception'):
    encoder = Encoder(seq_len=seq_len, decoder_batch_size=decoder_batch_size, model_name=model_name)
    encoder.cuda()
    encoder.eval()

    with torch.no_grad():
        images = []
        for i in range(decoder_batch_size):
            images.append(torch.rand((seq_len, 3, 299, 299)))
        images = torch.stack(images).cuda()
        features = encoder.forward(images)

        split_features = []
        for i in range(decoder_batch_size):
            split_features.append(encoder._forward_old(images[i]))
        split_features = torch.stack(split_features)

    assert(torch.all(split_features == features) == 1)
    print('encoder test passed!')
コード例 #5
0
    def initEncoderDecoder(self):

        if self.opt.dataset == 'SMPL':
            num_verts = 6890
        elif self.opt.dataset == 'all_animals':
            num_verts = 3889

        encoder = Encoder()
        decoder = Decoder(num_verts=num_verts)
        encoder.load_state_dict(torch.load(self.encoder_weights))
        decoder.load_state_dict(torch.load(self.decoder_weights))
        self.encoder = encoder.eval()
        self.decoder = decoder.eval()
コード例 #6
0
ファイル: trainer.py プロジェクト: cshang2017/Planpix
class Trainer():
    def __init__(self, params, experience_replay_buffer,metrics,results_dir,env):
        self.parms = params     
        self.D = experience_replay_buffer  
        self.metrics = metrics
        self.env = env
        self.tested_episodes = 0

        self.statistics_path = results_dir+'/statistics' 
        self.model_path = results_dir+'/model' 
        self.video_path = results_dir+'/video' 
        self.rew_vs_pred_rew_path = results_dir+'/rew_vs_pred_rew'
        self.dump_plan_path = results_dir+'/dump_plan'
        
        #if folder do not exists, create it
        os.makedirs(self.statistics_path, exist_ok=True) 
        os.makedirs(self.model_path, exist_ok=True) 
        os.makedirs(self.video_path, exist_ok=True) 
        os.makedirs(self.rew_vs_pred_rew_path, exist_ok=True) 
        os.makedirs(self.dump_plan_path, exist_ok=True) 
        

        # Create models
        self.transition_model = TransitionModel(self.parms.belief_size, self.parms.state_size, self.env.action_size, self.parms.hidden_size, self.parms.embedding_size, self.parms.activation_function).to(device=self.parms.device)
        self.observation_model = ObservationModel(self.parms.belief_size, self.parms.state_size, self.parms.embedding_size, self.parms.activation_function).to(device=self.parms.device)
        self.reward_model = RewardModel(self.parms.belief_size, self.parms.state_size, self.parms.hidden_size, self.parms.activation_function).to(device=self.parms.device)
        self.encoder = Encoder(self.parms.embedding_size,self.parms.activation_function).to(device=self.parms.device)
        self.param_list = list(self.transition_model.parameters()) + list(self.observation_model.parameters()) + list(self.reward_model.parameters()) + list(self.encoder.parameters()) 
        self.optimiser = optim.Adam(self.param_list, lr=0 if self.parms.learning_rate_schedule != 0 else self.parms.learning_rate, eps=self.parms.adam_epsilon)
        self.planner = MPCPlanner(self.env.action_size, self.parms.planning_horizon, self.parms.optimisation_iters, self.parms.candidates, self.parms.top_candidates, self.transition_model, self.reward_model,self.env.action_range[0], self.env.action_range[1])

        global_prior = Normal(torch.zeros(self.parms.batch_size, self.parms.state_size, device=self.parms.device), torch.ones(self.parms.batch_size, self.parms.state_size, device=self.parms.device))  # Global prior N(0, I)
        self.free_nats = torch.full((1, ), self.parms.free_nats, dtype=torch.float32, device=self.parms.device)  # Allowed deviation in KL divergence

    def load_checkpoints(self):
        self.metrics = torch.load(self.model_path+'/metrics.pth')
        model_path = self.model_path+'/best_model'
        os.makedirs(model_path, exist_ok=True) 
        files = os.listdir(model_path)
        if files:
            checkpoint = [f for f in files if os.path.isfile(os.path.join(model_path, f))]
            model_dicts = torch.load(os.path.join(model_path, checkpoint[0]),map_location=self.parms.device)
            self.transition_model.load_state_dict(model_dicts['transition_model'])
            self.observation_model.load_state_dict(model_dicts['observation_model'])
            self.reward_model.load_state_dict(model_dicts['reward_model'])
            self.encoder.load_state_dict(model_dicts['encoder'])
            self.optimiser.load_state_dict(model_dicts['optimiser'])  
            print("Loading models checkpoints!")
        else:
            print("Checkpoints not found!")


    def update_belief_and_act(self, env, belief, posterior_state, action, observation, reward, min_action=-inf, max_action=inf,explore=False):
        # Infer belief over current state q(s_t|o≤t,a<t) from the history
        encoded_obs = self.encoder(observation).unsqueeze(dim=0).to(device=self.parms.device)       
        belief, _, _, _, posterior_state, _, _ = self.transition_model(posterior_state, action.unsqueeze(dim=0), belief, encoded_obs)  # Action and observation need extra time dimension
        belief, posterior_state = belief.squeeze(dim=0), posterior_state.squeeze(dim=0)  # Remove time dimension from belief/state
        action,pred_next_rew,_,_,_ = self.planner(belief, posterior_state,explore)  # Get action from planner(q(s_t|o≤t,a<t), p)      
        
        if explore:
            action = action + self.parms.action_noise * torch.randn_like(action)  # Add exploration noise ε ~ p(ε) to the action
        action.clamp_(min=min_action, max=max_action)  # Clip action range
        next_observation, reward, done = env.step(action.cpu() if isinstance(env, EnvBatcher) else action[0].cpu())  # If single env is istanceted perform single action (get item from list), else perform all actions
        
        return belief, posterior_state, action, next_observation, reward, done,pred_next_rew 
    
    def fit_buffer(self,episode):
        ####
        # Fit data taken from buffer 
        ######

        # Model fitting
        losses = []
        tqdm.write("Fitting buffer")
        for s in tqdm(range(self.parms.collect_interval)):

            # Draw sequence chunks {(o_t, a_t, r_t+1, terminal_t+1)} ~ D uniformly at random from the dataset (including terminal flags)
            observations, actions, rewards, nonterminals = self.D.sample(self.parms.batch_size, self.parms.chunk_size)  # Transitions start at time t = 0
            # Create initial belief and state for time t = 0
            init_belief, init_state = torch.zeros(self.parms.batch_size, self.parms.belief_size, device=self.parms.device), torch.zeros(self.parms.batch_size, self.parms.state_size, device=self.parms.device)
            encoded_obs = bottle(self.encoder, (observations[1:], ))

            # Update belief/state using posterior from previous belief/state, previous action and current observation (over entire sequence at once)
            beliefs, prior_states, prior_means, prior_std_devs, posterior_states, posterior_means, posterior_std_devs = self.transition_model(init_state, actions[:-1], init_belief, encoded_obs, nonterminals[:-1])
            
            # Calculate observation likelihood, reward likelihood and KL losses (for t = 0 only for latent overshooting); sum over final dims, average over batch and time (original implementation, though paper seems to miss 1/T scaling?)
            # LOSS
            observation_loss = F.mse_loss(bottle(self.observation_model, (beliefs, posterior_states)), observations[1:], reduction='none').sum((2, 3, 4)).mean(dim=(0, 1))
            kl_loss = torch.max(kl_divergence(Normal(posterior_means, posterior_std_devs), Normal(prior_means, prior_std_devs)).sum(dim=2), self.free_nats).mean(dim=(0, 1))  
            reward_loss = F.mse_loss(bottle(self.reward_model, (beliefs, posterior_states)), rewards[:-1], reduction='none').mean(dim=(0, 1))            

            # Update model parameters
            self.optimiser.zero_grad()

            (observation_loss + reward_loss + kl_loss).backward() # BACKPROPAGATION
            nn.utils.clip_grad_norm_(self.param_list, self.parms.grad_clip_norm, norm_type=2)
            self.optimiser.step()
            # Store (0) observation loss (1) reward loss (2) KL loss
            losses.append([observation_loss.item(), reward_loss.item(), kl_loss.item()])#, regularizer_loss.item()])

        #save statistics and plot them
        losses = tuple(zip(*losses))  
        self.metrics['observation_loss'].append(losses[0])
        self.metrics['reward_loss'].append(losses[1])
        self.metrics['kl_loss'].append(losses[2])
      
        lineplot(self.metrics['episodes'][-len(self.metrics['observation_loss']):], self.metrics['observation_loss'], 'observation_loss', self.statistics_path)
        lineplot(self.metrics['episodes'][-len(self.metrics['reward_loss']):], self.metrics['reward_loss'], 'reward_loss', self.statistics_path)
        lineplot(self.metrics['episodes'][-len(self.metrics['kl_loss']):], self.metrics['kl_loss'], 'kl_loss', self.statistics_path)
        
    def explore_and_collect(self,episode):
        tqdm.write("Collect new data:")
        reward = 0
        # Data collection
        with torch.no_grad():
            done = False
            observation, total_reward = self.env.reset(), 0
            belief, posterior_state, action = torch.zeros(1, self.parms.belief_size, device=self.parms.device), torch.zeros(1, self.parms.state_size, device=self.parms.device), torch.zeros(1, self.env.action_size, device=self.parms.device)
            t = 0
            real_rew = []
            predicted_rew = [] 
            total_steps = self.parms.max_episode_length // self.env.action_repeat
            explore = True

            for t in tqdm(range(total_steps)):
                # Here we need to explore
                belief, posterior_state, action, next_observation, reward, done, pred_next_rew = self.update_belief_and_act(self.env, belief, posterior_state, action, observation.to(device=self.parms.device), [reward], self.env.action_range[0], self.env.action_range[1], explore=explore)
                self.D.append(observation, action.cpu(), reward, done)
                real_rew.append(reward)
                predicted_rew.append(pred_next_rew.to(device=self.parms.device).item())
                total_reward += reward
                observation = next_observation
                if self.parms.flag_render:
                    env.render()
                if done:
                    break

        # Update and plot train reward metrics
        self.metrics['steps'].append( (t * self.env.action_repeat) + self.metrics['steps'][-1])
        self.metrics['episodes'].append(episode)
        self.metrics['train_rewards'].append(total_reward)
        self.metrics['predicted_rewards'].append(np.array(predicted_rew).sum())

        lineplot(self.metrics['episodes'][-len(self.metrics['train_rewards']):], self.metrics['train_rewards'], 'train_rewards', self.statistics_path)
        double_lineplot(self.metrics['episodes'], self.metrics['train_rewards'], self.metrics['predicted_rewards'], "train_r_vs_pr", self.statistics_path)

    def train_models(self):
        # from (init_episodes) to (training_episodes + init_episodes)
        tqdm.write("Start training.")

        for episode in tqdm(range(self.parms.num_init_episodes +1, self.parms.training_episodes) ):
            self.fit_buffer(episode)       
            self.explore_and_collect(episode)
            if episode % self.parms.test_interval == 0:
                self.test_model(episode)
                torch.save(self.metrics, os.path.join(self.model_path, 'metrics.pth'))
                torch.save({'transition_model': self.transition_model.state_dict(), 'observation_model': self.observation_model.state_dict(), 'reward_model': self.reward_model.state_dict(), 'encoder': self.encoder.state_dict(), 'optimiser': self.optimiser.state_dict()},  os.path.join(self.model_path, 'models_%d.pth' % episode))
            
            if episode % self.parms.storing_dataset_interval == 0:
                self.D.store_dataset(self.parms.dataset_path+'dump_dataset')

        return self.metrics

    def test_model(self, episode=None): #no explore here
        if episode is None:
            episode = self.tested_episodes


        # Set models to eval mode
        self.transition_model.eval()
        self.observation_model.eval()
        self.reward_model.eval()
        self.encoder.eval()
        
        # Initialise parallelised test environments
        test_envs = EnvBatcher(ControlSuiteEnv, (self.parms.env_name, self.parms.seed, self.parms.max_episode_length, self.parms.bit_depth), {}, self.parms.test_episodes)
        total_steps = self.parms.max_episode_length // test_envs.action_repeat
        rewards = np.zeros(self.parms.test_episodes)
        
        real_rew = torch.zeros([total_steps,self.parms.test_episodes])
        predicted_rew = torch.zeros([total_steps,self.parms.test_episodes])

        with torch.no_grad():
            observation, total_rewards, video_frames = test_envs.reset(), np.zeros((self.parms.test_episodes, )), []            
            belief, posterior_state, action = torch.zeros(self.parms.test_episodes, self.parms.belief_size, device=self.parms.device), torch.zeros(self.parms.test_episodes, self.parms.state_size, device=self.parms.device), torch.zeros(self.parms.test_episodes, self.env.action_size, device=self.parms.device)
            tqdm.write("Testing model.")
            for t in range(total_steps):     
                belief, posterior_state, action, next_observation, rewards, done, pred_next_rew  = self.update_belief_and_act(test_envs,  belief, posterior_state, action, observation.to(device=self.parms.device), list(rewards), self.env.action_range[0], self.env.action_range[1])
                total_rewards += rewards.numpy()
                real_rew[t] = rewards
                predicted_rew[t]  = pred_next_rew

                observation = self.env.get_original_frame().unsqueeze(dim=0)

                video_frames.append(make_grid(torch.cat([observation, self.observation_model(belief, posterior_state).cpu()], dim=3) + 0.5, nrow=5).numpy())  # Decentre
                observation = next_observation
                if done.sum().item() == self.parms.test_episodes:
                    break
            
        real_rew = torch.transpose(real_rew, 0, 1)
        predicted_rew = torch.transpose(predicted_rew, 0, 1)
        
        #save and plot metrics 
        self.tested_episodes += 1
        self.metrics['test_episodes'].append(episode)
        self.metrics['test_rewards'].append(total_rewards.tolist())

        lineplot(self.metrics['test_episodes'], self.metrics['test_rewards'], 'test_rewards', self.statistics_path)
        
        write_video(video_frames, 'test_episode_%s' % str(episode), self.video_path)  # Lossy compression
        # Set models to train mode
        self.transition_model.train()
        self.observation_model.train()
        self.reward_model.train()
        self.encoder.train()
        # Close test environments
        test_envs.close()
        return self.metrics


    def dump_plan_video(self, step_before_plan=120): 
        #number of steps before to start to collect frames to dump
        step_before_plan = min(step_before_plan, (self.parms.max_episode_length // self.env.action_repeat))
        
        # Set models to eval mode
        self.transition_model.eval()
        self.observation_model.eval()
        self.reward_model.eval()
        self.encoder.eval()
        video_frames = []
        reward = 0

        with torch.no_grad():
            observation = self.env.reset()
            belief, posterior_state, action = torch.zeros(1, self.parms.belief_size, device=self.parms.device), torch.zeros(1, self.parms.state_size, device=self.parms.device), torch.zeros(1, self.env.action_size, device=self.parms.device)
            tqdm.write("Executing episode.")
            for t in range(step_before_plan): #floor division
                belief, posterior_state, action, next_observation, reward, done, _ = self.update_belief_and_act(self.env,  belief, posterior_state, action, observation.to(device=self.parms.device), [reward], self.env.action_range[0], self.env.action_range[1])
                observation = next_observation
                video_frames.append(make_grid(torch.cat([observation.cpu(), self.observation_model(belief, posterior_state).to(device=self.parms.device).cpu()], dim=3) + 0.5, nrow=5).numpy())  # Decentre
                if done:
                    break
            self.create_and_dump_plan(self.env,  belief, posterior_state, action, observation.to(device=self.parms.device), [reward], self.env.action_range[0], self.env.action_range[1])
            
            
        # Set models to train mode
        self.transition_model.train()
        self.observation_model.train()
        self.reward_model.train()
        self.encoder.train()
        # Close test environments
        self.env.close()

    def create_and_dump_plan(self, env, belief, posterior_state, action, observation, reward, min_action=-inf, max_action=inf): 

        tqdm.write("Dumping plan")
        video_frames = []

        encoded_obs = self.encoder(observation).unsqueeze(dim=0)
        belief, _, _, _, posterior_state, _, _ = self.transition_model(posterior_state, action.unsqueeze(dim=0), belief, encoded_obs)  
        belief, posterior_state = belief.squeeze(dim=0), posterior_state.squeeze(dim=0)  # Remove time dimension from belief/state
        next_action,_, beliefs, states, plan = self.planner(belief, posterior_state,False)  # Get action from planner(q(s_t|o≤t,a<t), p)      
        predicted_frames = self.observation_model(beliefs, states).to(device=self.parms.device)

        for i in range(self.parms.planning_horizon):
            plan[i].clamp_(min=env.action_range[0], max=self.env.action_range[1])  # Clip action range
            next_observation, reward, done = env.step(plan[i].cpu())  
            next_observation = next_observation.squeeze(dim=0)
            video_frames.append(make_grid(torch.cat([next_observation, predicted_frames[i]], dim=1) + 0.5, nrow=2).numpy())  # Decentre

        write_video(video_frames, 'dump_plan', self.dump_plan_path, dump_frame=True)  
    
            
コード例 #7
0
class SACAgent():
    def __init__(self, action_size, state_size, config):
        self.seed = config["seed"]
        torch.manual_seed(self.seed)
        np.random.seed(seed=self.seed)
        self.env = gym.make(config["env_name"])
        self.env = FrameStack(self.env, config)
        self.env.seed(self.seed)
        self.action_size = action_size
        self.state_size = state_size
        self.tau = config["tau"]
        self.gamma = config["gamma"]
        self.batch_size = config["batch_size"]
        self.lr = config["lr"]
        self.history_length = config["history_length"]
        self.size = config["size"]
        if not torch.cuda.is_available():
            config["device"] == "cpu"
        self.device = config["device"]
        self.eval = config["eval"]
        self.vid_path = config["vid_path"]
        print("actions size ", action_size)
        self.critic = QNetwork(state_size, action_size, config["fc1_units"],
                               config["fc2_units"]).to(self.device)
        self.q_optim = torch.optim.Adam(self.critic.parameters(),
                                        config["lr_critic"])
        self.target_critic = QNetwork(state_size, action_size,
                                      config["fc1_units"],
                                      config["fc2_units"]).to(self.device)
        self.target_critic.load_state_dict(self.critic.state_dict())
        self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device)
        self.alpha = self.log_alpha.exp()
        self.alpha_optim = Adam([self.log_alpha], lr=config["lr_alpha"])
        self.policy = SACActor(state_size, action_size).to(self.device)
        self.policy_optim = Adam(self.policy.parameters(),
                                 lr=config["lr_policy"])
        self.encoder = Encoder(config).to(self.device)
        self.encoder_optimizer = torch.optim.Adam(self.encoder.parameters(),
                                                  self.lr)
        self.episodes = config["episodes"]
        self.memory = ReplayBuffer((self.history_length, self.size, self.size),
                                   (1, ), config["buffer_size"],
                                   config["image_pad"], self.seed, self.device)
        pathname = config["seed"]
        tensorboard_name = str(config["res_path"]) + '/runs/' + str(pathname)
        self.writer = SummaryWriter(tensorboard_name)
        self.steps = 0
        self.target_entropy = -torch.prod(
            torch.Tensor(action_size).to(self.device)).item()

    def act(self, state, evaluate=False):
        with torch.no_grad():
            state = torch.FloatTensor(state).to(self.device).unsqueeze(0)
            state = state.type(torch.float32).div_(255)
            self.encoder.eval()
            state = self.encoder.create_vector(state)
            self.encoder.train()
            if evaluate is False:
                action = self.policy.sample(state)
            else:
                action_prob, _ = self.policy(state)
                action = torch.argmax(action_prob)
                action = action.cpu().numpy()
                return action
            # action = np.clip(action, self.min_action, self.max_action)
            action = action.cpu().numpy()[0]
        return action

    def train_agent(self):
        average_reward = 0
        scores_window = deque(maxlen=100)
        t0 = time.time()
        for i_epiosde in range(1, self.episodes):
            episode_reward = 0
            state = self.env.reset()
            t = 0
            while True:
                t += 1
                action = self.act(state)
                next_state, reward, done, _ = self.env.step(action)
                episode_reward += reward
                if i_epiosde > 10:
                    self.learn()
                self.memory.add(state, reward, action, next_state, done)
                state = next_state
                if done:
                    scores_window.append(episode_reward)
                    break
            if i_epiosde % self.eval == 0:
                self.eval_policy()
            ave_reward = np.mean(scores_window)
            print("Epiosde {} Steps {} Reward {} Reward averge{:.2f} Time {}".
                  format(i_epiosde, t, episode_reward, np.mean(scores_window),
                         time_format(time.time() - t0)))
            self.writer.add_scalar('Aver_reward', ave_reward, self.steps)

    def learn(self):
        self.steps += 1
        states, rewards, actions, next_states, dones = self.memory.sample(
            self.batch_size)
        states = states.type(torch.float32).div_(255)
        states = self.encoder.create_vector(states)
        states_detached = states.detach()
        qf1, qf2 = self.critic(states)
        q_value1 = qf1.gather(1, actions)
        q_value2 = qf2.gather(1, actions)

        with torch.no_grad():
            next_states = next_states.type(torch.float32).div_(255)
            next_states = self.encoder.create_vector(next_states)
            q1_target, q2_target = self.target_critic(next_states)
            min_q_target = torch.min(q1_target, q2_target)
            next_action_prob, next_action_log_prob = self.policy(next_states)
            next_q_target = (
                next_action_prob *
                (min_q_target - self.alpha * next_action_log_prob)).sum(
                    dim=1, keepdim=True)
            next_q_value = rewards + (1 - dones) * self.gamma * next_q_target

        # --------------------------update-q--------------------------------------------------------
        loss = F.mse_loss(q_value1, next_q_value) + F.mse_loss(
            q_value2, next_q_value)
        self.q_optim.zero_grad()
        self.encoder_optimizer.zero_grad()
        loss.backward()
        self.q_optim.step()
        self.encoder_optimizer.zero_grad()
        self.writer.add_scalar('loss/q', loss, self.steps)

        # --------------------------update-policy--------------------------------------------------------
        action_prob, log_action_prob = self.policy(states_detached)
        with torch.no_grad():
            q_pi1, q_pi2 = self.critic(states_detached)
            min_q_values = torch.min(q_pi1, q_pi2)
        #policy_loss = (action_prob *  ((self.alpha * log_action_prob) - min_q_values).detach()).sum(dim=1).mean()
        policy_loss = (action_prob *
                       ((self.alpha * log_action_prob) - min_q_values)).sum(
                           dim=1).mean()
        self.policy_optim.zero_grad()
        policy_loss.backward()
        self.policy_optim.step()
        self.writer.add_scalar('loss/policy', policy_loss, self.steps)

        # --------------------------update-alpha--------------------------------------------------------
        alpha_loss = (action_prob.detach() *
                      (-self.log_alpha *
                       (log_action_prob + self.target_entropy).detach())).sum(
                           dim=1).mean()
        self.alpha_optim.zero_grad()
        alpha_loss.backward()
        self.alpha_optim.step()
        self.writer.add_scalar('loss/alpha', alpha_loss, self.steps)
        self.soft_udapte(self.critic, self.target_critic)
        self.alpha = self.log_alpha.exp()

    def soft_udapte(self, online, target):
        for param, target_parm in zip(online.parameters(),
                                      target.parameters()):
            target_parm.data.copy_(self.tau * param.data +
                                   (1 - self.tau) * target_parm.data)

    def eval_policy(self, eval_episodes=4):
        env = gym.make(self.env_name)
        env = wrappers.Monitor(env,
                               str(self.vid_path) + "/{}".format(self.steps),
                               video_callable=lambda episode_id: True,
                               force=True)
        average_reward = 0
        scores_window = deque(maxlen=100)
        for i_epiosde in range(eval_episodes):
            print("Eval Episode {} of {} ".format(i_epiosde, eval_episodes))
            episode_reward = 0
            state = env.reset()
            while True:
                action = self.act(state, evaluate=True)
                state, reward, done, _ = env.step(action)
                episode_reward += reward
                if done:
                    break
            scores_window.append(episode_reward)
        average_reward = np.mean(scores_window)
        self.writer.add_scalar('Eval_reward', average_reward, self.steps)
コード例 #8
0
class Plan(object):
    def __init__(self):

        self.results_dir = os.path.join(
            'results',
            '{}_seed_{}_{}_action_scale_{}_no_explore_{}_pool_len_{}_optimisation_iters_{}_top_planning-horizon'
            .format(args.env, args.seed, args.algo, args.action_scale,
                    args.pool_len, args.optimisation_iters,
                    args.top_planning_horizon))

        args.results_dir = self.results_dir
        args.MultiGPU = True if torch.cuda.device_count(
        ) > 1 and args.MultiGPU else False

        self.__basic_setting()
        self.__init_sample()  # Sampleing The Init Data

        # Initialise model parameters randomly
        self.transition_model = TransitionModel(
            args.belief_size, args.state_size, self.env.action_size,
            args.hidden_size, args.embedding_size,
            args.dense_activation_function).to(device=args.device)
        self.observation_model = ObservationModel(
            args.symbolic_env, self.env.observation_size, args.belief_size,
            args.state_size, args.embedding_size,
            args.cnn_activation_function).to(device=args.device)
        self.reward_model = RewardModel(
            args.belief_size, args.state_size, args.hidden_size,
            args.dense_activation_function).to(device=args.device)
        self.encoder = Encoder(
            args.symbolic_env, self.env.observation_size, args.embedding_size,
            args.cnn_activation_function).to(device=args.device)

        print("We Have {} GPUS".format(torch.cuda.device_count())
              ) if args.MultiGPU else print("We use CPU")
        self.transition_model = nn.DataParallel(
            self.transition_model.to(device=args.device)
        ) if args.MultiGPU else self.transition_model
        self.observation_model = nn.DataParallel(
            self.observation_model.to(device=args.device)
        ) if args.MultiGPU else self.observation_model
        self.reward_model = nn.DataParallel(
            self.reward_model.to(
                device=args.device)) if args.MultiGPU else self.reward_model

        # encoder = nn.DataParallel(encoder.cuda())
        # actor_model = nn.DataParallel(actor_model.cuda())
        # value_model = nn.DataParallel(value_model.cuda())

        # share the global parameters in multiprocessing
        self.encoder.share_memory()
        self.observation_model.share_memory()
        self.reward_model.share_memory()

        # Set all_model/global_actor_optimizer/global_value_optimizer
        self.param_list = list(self.transition_model.parameters()) + list(
            self.observation_model.parameters()) + list(
                self.reward_model.parameters()) + list(
                    self.encoder.parameters())
        self.model_optimizer = optim.Adam(
            self.param_list,
            lr=0
            if args.learning_rate_schedule != 0 else args.model_learning_rate,
            eps=args.adam_epsilon)

    def update_belief_and_act(self,
                              args,
                              env,
                              belief,
                              posterior_state,
                              action,
                              observation,
                              explore=False):
        # Infer belief over current state q(s_t|o≤t,a<t) from the history
        # print("action size: ",action.size()) torch.Size([1, 6])
        belief, _, _, _, posterior_state, _, _ = self.upper_transition_model(
            posterior_state, action.unsqueeze(dim=0), belief,
            self.encoder(observation).unsqueeze(dim=0), None)
        if hasattr(env, "envs"):
            belief, posterior_state = list(
                map(lambda x: x.view(-1, args.test_episodes, x.shape[2]),
                    [x for x in [belief, posterior_state]]))

        belief, posterior_state = belief.squeeze(
            dim=0), posterior_state.squeeze(
                dim=0)  # Remove time dimension from belief/state
        action = self.algorithms.get_action(belief, posterior_state, explore)

        if explore:
            action = torch.clamp(
                Normal(action, args.action_noise).rsample(), -1, 1
            )  # Add gaussian exploration noise on top of the sampled action
            # action = action + args.action_noise * torch.randn_like(action)  # Add exploration noise ε ~ p(ε) to the action
        next_observation, reward, done = env.step(
            action.cpu() if isinstance(env, EnvBatcher) else action[0].cpu(
            ))  # Perform environment step (action repeats handled internally)
        return belief, posterior_state, action, next_observation, reward, done

    def run(self):
        if args.algo == "dreamer":
            print("DREAMER")
            from algorithms.dreamer import Algorithms
            self.algorithms = Algorithms(self.env.action_size,
                                         self.transition_model, self.encoder,
                                         self.reward_model,
                                         self.observation_model)
        elif args.algo == "p2p":
            print("planing to plan")
            from algorithms.plan_to_plan import Algorithms
            self.algorithms = Algorithms(self.env.action_size,
                                         self.transition_model, self.encoder,
                                         self.reward_model,
                                         self.observation_model)
        elif args.algo == "actor_pool_1":
            print("async sub actor")
            from algorithms.actor_pool_1 import Algorithms_actor
            self.algorithms = Algorithms_actor(self.env.action_size,
                                               self.transition_model,
                                               self.encoder, self.reward_model,
                                               self.observation_model)
        elif args.algo == "aap":
            from algorithms.asynchronous_actor_planet import Algorithms
            self.algorithms = Algorithms(self.env.action_size,
                                         self.transition_model, self.encoder,
                                         self.reward_model,
                                         self.observation_model)
        else:
            print("planet")
            from algorithms.planet import Algorithms
            # args.MultiGPU = False
            self.algorithms = Algorithms(self.env.action_size,
                                         self.transition_model,
                                         self.reward_model)

        if args.test: self.test_only()

        self.global_prior = Normal(
            torch.zeros(args.batch_size, args.state_size, device=args.device),
            torch.ones(args.batch_size, args.state_size,
                       device=args.device))  # Global prior N(0, I)
        self.free_nats = torch.full(
            (1, ), args.free_nats,
            device=args.device)  # Allowed deviation in KL divergence

        # Training (and testing)
        # args.episodes = 1
        for episode in tqdm(range(self.metrics['episodes'][-1] + 1,
                                  args.episodes + 1),
                            total=args.episodes,
                            initial=self.metrics['episodes'][-1] + 1):
            losses = self.train()
            # self.algorithms.save_loss_data(self.metrics['episodes']) # Update and plot loss metrics
            self.save_loss_data(tuple(
                zip(*losses)))  # Update and plot loss metrics
            self.data_collection(episode=episode)  # Data collection
            # args.test_interval = 1
            if episode % args.test_interval == 0:
                self.test(episode=episode)  # Test model
            self.save_model_data(episode=episode)  # save model

        self.env.close()  # Close training environment

    def train_env_model(self, beliefs, prior_states, prior_means,
                        prior_std_devs, posterior_states, posterior_means,
                        posterior_std_devs, observations, actions, rewards,
                        nonterminals):
        # Calculate observation likelihood, reward likelihood and KL losses (for t = 0 only for latent overshooting); sum over final dims, average over batch and time (original implementation, though paper seems to miss 1/T scaling?)
        if args.worldmodel_LogProbLoss:
            observation_dist = Normal(
                bottle(self.observation_model, (beliefs, posterior_states)), 1)
            observation_loss = -observation_dist.log_prob(
                observations[1:]).sum(
                    dim=2 if args.symbolic_env else (2, 3, 4)).mean(dim=(0, 1))
        else:
            observation_loss = F.mse_loss(
                bottle(self.observation_model, (beliefs, posterior_states)),
                observations[1:],
                reduction='none').sum(
                    dim=2 if args.symbolic_env else (2, 3, 4)).mean(dim=(0, 1))
        if args.worldmodel_LogProbLoss:
            reward_dist = Normal(
                bottle(self.reward_model, (beliefs, posterior_states)), 1)
            reward_loss = -reward_dist.log_prob(rewards[:-1]).mean(dim=(0, 1))
        else:
            reward_loss = F.mse_loss(bottle(self.reward_model,
                                            (beliefs, posterior_states)),
                                     rewards[:-1],
                                     reduction='none').mean(dim=(0, 1))

        # transition loss
        div = kl_divergence(Normal(posterior_means, posterior_std_devs),
                            Normal(prior_means, prior_std_devs)).sum(dim=2)
        kl_loss = torch.max(div, self.free_nats).mean(
            dim=(0, 1)
        )  # Note that normalisation by overshooting distance and weighting by overshooting distance cancel out
        if args.global_kl_beta != 0:
            kl_loss += args.global_kl_beta * kl_divergence(
                Normal(posterior_means, posterior_std_devs),
                self.global_prior).sum(dim=2).mean(dim=(0, 1))
        # Calculate latent overshooting objective for t > 0
        if args.overshooting_kl_beta != 0:
            overshooting_vars = [
            ]  # Collect variables for overshooting to process in batch
            for t in range(1, args.chunk_size - 1):
                d = min(t + args.overshooting_distance,
                        args.chunk_size - 1)  # Overshooting distance
                t_, d_ = t - 1, d - 1  # Use t_ and d_ to deal with different time indexing for latent states
                seq_pad = (
                    0, 0, 0, 0, 0, t - d + args.overshooting_distance
                )  # Calculate sequence padding so overshooting terms can be calculated in one batch
                # Store (0) actions, (1) nonterminals, (2) rewards, (3) beliefs, (4) prior states, (5) posterior means, (6) posterior standard deviations and (7) sequence masks
                overshooting_vars.append(
                    (F.pad(actions[t:d],
                           seq_pad), F.pad(nonterminals[t:d], seq_pad),
                     F.pad(rewards[t:d],
                           seq_pad[2:]), beliefs[t_], prior_states[t_],
                     F.pad(posterior_means[t_ + 1:d_ + 1].detach(), seq_pad),
                     F.pad(posterior_std_devs[t_ + 1:d_ + 1].detach(),
                           seq_pad,
                           value=1),
                     F.pad(
                         torch.ones(d - t,
                                    args.batch_size,
                                    args.state_size,
                                    device=args.device), seq_pad))
                )  # Posterior standard deviations must be padded with > 0 to prevent infinite KL divergences
            overshooting_vars = tuple(zip(*overshooting_vars))
            # Update belief/state using prior from previous belief/state and previous action (over entire sequence at once)
            beliefs, prior_states, prior_means, prior_std_devs = self.upper_transition_model(
                torch.cat(overshooting_vars[4], dim=0),
                torch.cat(overshooting_vars[0], dim=1),
                torch.cat(overshooting_vars[3], dim=0), None,
                torch.cat(overshooting_vars[1], dim=1))
            seq_mask = torch.cat(overshooting_vars[7], dim=1)
            # Calculate overshooting KL loss with sequence mask
            kl_loss += (
                1 / args.overshooting_distance
            ) * args.overshooting_kl_beta * torch.max((kl_divergence(
                Normal(torch.cat(overshooting_vars[5], dim=1),
                       torch.cat(overshooting_vars[6], dim=1)),
                Normal(prior_means, prior_std_devs)
            ) * seq_mask).sum(dim=2), self.free_nats).mean(dim=(0, 1)) * (
                args.chunk_size
                - 1
            )  # Update KL loss (compensating for extra average over each overshooting/open loop sequence)
            # Calculate overshooting reward prediction loss with sequence mask
            if args.overshooting_reward_scale != 0:
                reward_loss += (
                    1 / args.overshooting_distance
                ) * args.overshooting_reward_scale * F.mse_loss(
                    bottle(self.reward_model,
                           (beliefs, prior_states)) * seq_mask[:, :, 0],
                    torch.cat(overshooting_vars[2], dim=1),
                    reduction='none'
                ).mean(dim=(0, 1)) * (
                    args.chunk_size - 1
                )  # Update reward loss (compensating for extra average over each overshooting/open loop sequence)
        # Apply linearly ramping learning rate schedule
        if args.learning_rate_schedule != 0:
            for group in self.model_optimizer.param_groups:
                group['lr'] = min(
                    group['lr'] + args.model_learning_rate /
                    args.model_learning_rate_schedule,
                    args.model_learning_rate)
        model_loss = observation_loss + reward_loss + kl_loss
        # Update model parameters
        self.model_optimizer.zero_grad()
        model_loss.backward()
        nn.utils.clip_grad_norm_(self.param_list,
                                 args.grad_clip_norm,
                                 norm_type=2)
        self.model_optimizer.step()
        return observation_loss, reward_loss, kl_loss

    def train(self):
        # Model fitting
        losses = []
        print("training loop")
        # args.collect_interval = 1
        for s in tqdm(range(args.collect_interval)):

            # Draw sequence chunks {(o_t, a_t, r_t+1, terminal_t+1)} ~ D uniformly at random from the dataset (including terminal flags)
            observations, actions, rewards, nonterminals = self.D.sample(
                args.batch_size,
                args.chunk_size)  # Transitions start at time t = 0
            # Create initial belief and state for time t = 0
            init_belief, init_state = torch.zeros(
                args.batch_size, args.belief_size,
                device=args.device), torch.zeros(args.batch_size,
                                                 args.state_size,
                                                 device=args.device)
            # Update belief/state using posterior from previous belief/state, previous action and current observation (over entire sequence at once)
            obs = bottle(self.encoder, (observations[1:], ))
            beliefs, prior_states, prior_means, prior_std_devs, posterior_states, posterior_means, posterior_std_devs = self.upper_transition_model(
                prev_state=init_state,
                actions=actions[:-1],
                prev_belief=init_belief,
                obs=obs,
                nonterminals=nonterminals[:-1])

            # Calculate observation likelihood, reward likelihood and KL losses (for t = 0 only for latent overshooting); sum over final dims, average over batch and time (original implementation, though paper seems to miss 1/T scaling?)
            observation_loss, reward_loss, kl_loss = self.train_env_model(
                beliefs, prior_states, prior_means, prior_std_devs,
                posterior_states, posterior_means, posterior_std_devs,
                observations, actions, rewards, nonterminals)

            # Dreamer implementation: actor loss calculation and optimization
            with torch.no_grad():
                actor_states = posterior_states.detach().to(
                    device=args.device).share_memory_()
                actor_beliefs = beliefs.detach().to(
                    device=args.device).share_memory_()

            # if not os.path.exists(os.path.join(os.getcwd(), 'tensor_data/' + args.results_dir)): os.mkdir(os.path.join(os.getcwd(), 'tensor_data/' + args.results_dir))
            torch.save(
                actor_states,
                os.path.join(os.getcwd(),
                             args.results_dir + '/actor_states.pt'))
            torch.save(
                actor_beliefs,
                os.path.join(os.getcwd(),
                             args.results_dir + '/actor_beliefs.pt'))

            # [self.actor_pipes[i][0].send(1) for i, w in enumerate(self.workers_actor)]  # Parent_pipe send data using i'th pipes
            # [self.actor_pipes[i][0].recv() for i, _ in enumerate(self.actor_pool)]  # waitting the children finish

            self.algorithms.train_algorithm(actor_states, actor_beliefs)
            losses.append(
                [observation_loss.item(),
                 reward_loss.item(),
                 kl_loss.item()])

            # if self.algorithms.train_algorithm(actor_states, actor_beliefs) is not None:
            #   merge_actor_loss, merge_value_loss = self.algorithms.train_algorithm(actor_states, actor_beliefs)
            #   losses.append([observation_loss.item(), reward_loss.item(), kl_loss.item(), merge_actor_loss.item(), merge_value_loss.item()])
            # else:
            #   losses.append([observation_loss.item(), reward_loss.item(), kl_loss.item()])

        return losses

    def data_collection(self, episode):
        print("Data collection")
        with torch.no_grad():
            observation, total_reward = self.env.reset(), 0
            belief, posterior_state, action = torch.zeros(
                1, args.belief_size, device=args.device), torch.zeros(
                    1, args.state_size,
                    device=args.device), torch.zeros(1,
                                                     self.env.action_size,
                                                     device=args.device)
            pbar = tqdm(range(args.max_episode_length // args.action_repeat))
            for t in pbar:
                # print("step",t)
                belief, posterior_state, action, next_observation, reward, done = self.update_belief_and_act(
                    args, self.env, belief, posterior_state, action,
                    observation.to(device=args.device))
                self.D.append(observation, action.cpu(), reward, done)
                total_reward += reward
                observation = next_observation
                if args.render: self.env.render()
                if done:
                    pbar.close()
                    break

            # Update and plot train reward metrics
            self.metrics['steps'].append(t + self.metrics['steps'][-1])
            self.metrics['episodes'].append(episode)
            self.metrics['train_rewards'].append(total_reward)

            Save_Txt(self.metrics['episodes'][-1],
                     self.metrics['train_rewards'][-1], 'train_rewards',
                     args.results_dir)
            # lineplot(metrics['episodes'][-len(metrics['train_rewards']):], metrics['train_rewards'], 'train_rewards', results_dir)

    def test(self, episode):
        print("Test model")
        # Set models to eval mode
        self.transition_model.eval()
        self.observation_model.eval()
        self.reward_model.eval()
        self.encoder.eval()
        self.algorithms.train_to_eval()
        # self.actor_model_g.eval()
        # self.value_model_g.eval()
        # Initialise parallelised test environments
        test_envs = EnvBatcher(
            Env, (args.env, args.symbolic_env, args.seed,
                  args.max_episode_length, args.action_repeat, args.bit_depth),
            {}, args.test_episodes)

        with torch.no_grad():
            observation, total_rewards, video_frames = test_envs.reset(
            ), np.zeros((args.test_episodes, )), []
            belief, posterior_state, action = torch.zeros(
                args.test_episodes, args.belief_size,
                device=args.device), torch.zeros(
                    args.test_episodes, args.state_size,
                    device=args.device), torch.zeros(args.test_episodes,
                                                     self.env.action_size,
                                                     device=args.device)
            pbar = tqdm(range(args.max_episode_length // args.action_repeat))
            for t in pbar:
                belief, posterior_state, action, next_observation, reward, done = self.update_belief_and_act(
                    args, test_envs, belief, posterior_state, action,
                    observation.to(device=args.device))
                total_rewards += reward.numpy()
                if not args.symbolic_env:  # Collect real vs. predicted frames for video
                    video_frames.append(
                        make_grid(torch.cat([
                            observation,
                            self.observation_model(belief,
                                                   posterior_state).cpu()
                        ],
                                            dim=3) + 0.5,
                                  nrow=5).numpy())  # Decentre
                observation = next_observation
                if done.sum().item() == args.test_episodes:
                    pbar.close()
                    break

        # Update and plot reward metrics (and write video if applicable) and save metrics
        self.metrics['test_episodes'].append(episode)
        self.metrics['test_rewards'].append(total_rewards.tolist())

        Save_Txt(self.metrics['test_episodes'][-1],
                 self.metrics['test_rewards'][-1], 'test_rewards',
                 args.results_dir)
        # Save_Txt(np.asarray(metrics['steps'])[np.asarray(metrics['test_episodes']) - 1], metrics['test_rewards'],'test_rewards_steps', results_dir, xaxis='step')

        # lineplot(metrics['test_episodes'], metrics['test_rewards'], 'test_rewards', results_dir)
        # lineplot(np.asarray(metrics['steps'])[np.asarray(metrics['test_episodes']) - 1], metrics['test_rewards'], 'test_rewards_steps', results_dir, xaxis='step')
        if not args.symbolic_env:
            episode_str = str(episode).zfill(len(str(args.episodes)))
            write_video(video_frames, 'test_episode_%s' % episode_str,
                        args.results_dir)  # Lossy compression
            save_image(
                torch.as_tensor(video_frames[-1]),
                os.path.join(args.results_dir,
                             'test_episode_%s.png' % episode_str))

        torch.save(self.metrics, os.path.join(args.results_dir, 'metrics.pth'))

        # Set models to train mode
        self.transition_model.train()
        self.observation_model.train()
        self.reward_model.train()
        self.encoder.train()
        # self.actor_model_g.train()
        # self.value_model_g.train()
        self.algorithms.eval_to_train()
        # Close test environments
        test_envs.close()

    def test_only(self):
        # Set models to eval mode
        self.transition_model.eval()
        self.reward_model.eval()
        self.encoder.eval()
        with torch.no_grad():
            total_reward = 0
            for _ in tqdm(range(args.test_episodes)):
                observation = self.env.reset()
                belief, posterior_state, action = torch.zeros(
                    1, args.belief_size, device=args.device), torch.zeros(
                        1, args.state_size,
                        device=args.device), torch.zeros(1,
                                                         self.env.action_size,
                                                         device=args.device)
                pbar = tqdm(
                    range(args.max_episode_length // args.action_repeat))
                for t in pbar:
                    belief, posterior_state, action, observation, reward, done = self.update_belief_and_act(
                        args, self.env, belief, posterior_state, action,
                        observation.to(evice=args.device))
                    total_reward += reward
                    if args.render: self.env.render()
                    if done:
                        pbar.close()
                        break
        print('Average Reward:', total_reward / args.test_episodes)
        self.env.close()
        quit()

    def __basic_setting(self):
        args.overshooting_distance = min(
            args.chunk_size, args.overshooting_distance
        )  # Overshooting distance cannot be greater than chunk size
        print(' ' * 26 + 'Options')
        for k, v in vars(args).items():
            print(' ' * 26 + k + ': ' + str(v))

        print("torch.cuda.device_count() {}".format(torch.cuda.device_count()))
        os.makedirs(args.results_dir, exist_ok=True)
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        # Set Cuda
        if torch.cuda.is_available() and not args.disable_cuda:
            print("using CUDA")
            args.device = torch.device('cuda')
            torch.cuda.manual_seed(args.seed)
        else:
            print("using CPU")
            args.device = torch.device('cpu')

        self.summary_name = args.results_dir + "/{}_{}_log"
        self.writer = SummaryWriter(self.summary_name.format(
            args.env, args.id))
        self.env = Env(args.env, args.symbolic_env, args.seed,
                       args.max_episode_length, args.action_repeat,
                       args.bit_depth)
        self.metrics = {
            'steps': [],
            'episodes': [],
            'train_rewards': [],
            'test_episodes': [],
            'test_rewards': [],
            'observation_loss': [],
            'reward_loss': [],
            'kl_loss': [],
            'merge_actor_loss': [],
            'merge_value_loss': []
        }

    def __init_sample(self):
        if args.experience_replay is not '' and os.path.exists(
                args.experience_replay):
            self.D = torch.load(args.experience_replay)
            self.metrics['steps'], self.metrics['episodes'] = [
                self.D.steps
            ] * self.D.episodes, list(range(1, self.D.episodes + 1))
        elif not args.test:
            self.D = ExperienceReplay(args.experience_size, args.symbolic_env,
                                      self.env.observation_size,
                                      self.env.action_size, args.bit_depth,
                                      args.device)

            # Initialise dataset D with S random seed episodes
            print(
                "Start Multi Sample Processing -------------------------------"
            )
            start_time = time.time()
            data_lists = [
                Manager().list() for i in range(1, args.seed_episodes + 1)
            ]  # Set Global Lists
            pipes = [Pipe() for i in range(1, args.seed_episodes + 1)
                     ]  # Set Multi Pipe
            workers_init_sample = [
                Worker_init_Sample(child_conn=child, id=i + 1)
                for i, [parent, child] in enumerate(pipes)
            ]

            for i, w in enumerate(workers_init_sample):
                w.start()  # Start Single Process
                pipes[i][0].send(
                    data_lists[i])  # Parent_pipe send data using i'th pipes
            [w.join() for w in workers_init_sample]  # wait sub_process done

            for i, [parent, child] in enumerate(pipes):
                # datas = parent.recv()
                for data in list(parent.recv()):
                    if isinstance(data, tuple):
                        assert len(data) == 4
                        self.D.append(data[0], data[1], data[2], data[3])
                    elif isinstance(data, int):
                        t = data
                        self.metrics['steps'].append(t * args.action_repeat + (
                            0 if len(self.metrics['steps']) ==
                            0 else self.metrics['steps'][-1]))
                        self.metrics['episodes'].append(i + 1)
                    else:
                        print(
                            "The Recvive Data Have Some Problems, Need To Fix")
            end_time = time.time()
            print("the process times {} s".format(end_time - start_time))
            print(
                "End Multi Sample Processing -------------------------------")

    def upper_transition_model(self, prev_state, actions, prev_belief, obs,
                               nonterminals):
        actions = torch.transpose(actions, 0, 1) if args.MultiGPU else actions
        nonterminals = torch.transpose(nonterminals, 0, 1).to(
            device=args.device
        ) if args.MultiGPU and nonterminals is not None else nonterminals
        obs = torch.transpose(obs, 0, 1).to(
            device=args.device) if args.MultiGPU and obs is not None else obs
        temp_val = self.transition_model(prev_state.to(device=args.device),
                                         actions.to(device=args.device),
                                         prev_belief.to(device=args.device),
                                         obs, nonterminals)

        return list(
            map(
                lambda x: torch.cat(x.chunk(torch.cuda.device_count(), 0), 1)
                if x.shape[1] != prev_state.shape[0] else x,
                [x for x in temp_val]))

    def save_loss_data(self, losses):
        self.metrics['observation_loss'].append(losses[0])
        self.metrics['reward_loss'].append(losses[1])
        self.metrics['kl_loss'].append(losses[2])
        self.metrics['merge_actor_loss'].append(
            losses[3]) if losses.__len__() > 3 else None
        self.metrics['merge_value_loss'].append(
            losses[4]) if losses.__len__() > 3 else None

        Save_Txt(self.metrics['episodes'][-1],
                 self.metrics['observation_loss'][-1], 'observation_loss',
                 args.results_dir)
        Save_Txt(self.metrics['episodes'][-1], self.metrics['reward_loss'][-1],
                 'reward_loss', args.results_dir)
        Save_Txt(self.metrics['episodes'][-1], self.metrics['kl_loss'][-1],
                 'kl_loss', args.results_dir)
        Save_Txt(self.metrics['episodes'][-1],
                 self.metrics['merge_actor_loss'][-1], 'merge_actor_loss',
                 args.results_dir) if losses.__len__() > 3 else None
        Save_Txt(self.metrics['episodes'][-1],
                 self.metrics['merge_value_loss'][-1], 'merge_value_loss',
                 args.results_dir) if losses.__len__() > 3 else None

        # lineplot(metrics['episodes'][-len(metrics['observation_loss']):], metrics['observation_loss'], 'observation_loss', results_dir)
        # lineplot(metrics['episodes'][-len(metrics['reward_loss']):], metrics['reward_loss'], 'reward_loss', results_dir)
        # lineplot(metrics['episodes'][-len(metrics['kl_loss']):], metrics['kl_loss'], 'kl_loss', results_dir)
        # lineplot(metrics['episodes'][-len(metrics['actor_loss']):], metrics['actor_loss'], 'actor_loss', results_dir)
        # lineplot(metrics['episodes'][-len(metrics['value_loss']):], metrics['value_loss'], 'value_loss', results_dir)

    def save_model_data(self, episode):
        # writer.add_scalar("train_reward", metrics['train_rewards'][-1], metrics['steps'][-1])
        # writer.add_scalar("train/episode_reward", metrics['train_rewards'][-1], metrics['steps'][-1]*args.action_repeat)
        # writer.add_scalar("observation_loss", metrics['observation_loss'][0][-1], metrics['steps'][-1])
        # writer.add_scalar("reward_loss", metrics['reward_loss'][0][-1], metrics['steps'][-1])
        # writer.add_scalar("kl_loss", metrics['kl_loss'][0][-1], metrics['steps'][-1])
        # writer.add_scalar("actor_loss", metrics['actor_loss'][0][-1], metrics['steps'][-1])
        # writer.add_scalar("value_loss", metrics['value_loss'][0][-1], metrics['steps'][-1])
        # print("episodes: {}, total_steps: {}, train_reward: {} ".format(metrics['episodes'][-1], metrics['steps'][-1], metrics['train_rewards'][-1]))

        # Checkpoint models
        if episode % args.checkpoint_interval == 0:
            # torch.save({'transition_model': transition_model.state_dict(),
            #             'observation_model': observation_model.state_dict(),
            #             'reward_model': reward_model.state_dict(),
            #             'encoder': encoder.state_dict(),
            #             'actor_model': actor_model_g.state_dict(),
            #             'value_model': value_model_g.state_dict(),
            #             'model_optimizer': model_optimizer.state_dict(),
            #             'actor_optimizer': actor_optimizer_g.state_dict(),
            #             'value_optimizer': value_optimizer_g.state_dict()
            #             }, os.path.join(results_dir, 'models_%d.pth' % episode))
            if args.checkpoint_experience:
                torch.save(
                    self.D, os.path.join(args.results_dir, 'experience.pth')
                )  # Warning: will fail with MemoryError with large memory sizes
コード例 #9
0
class DQNAgent():
    """Interacts with and learns from the environment."""
    def __init__(self, state_size, action_size, config):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(config["seed"])
        self.seed = config["seed"]
        self.gamma = 0.99
        self.batch_size = config["batch_size"]
        self.lr = config["lr"]
        self.tau = config["tau"]
        self.fc1 = config["fc1_units"]
        self.fc2 = config["fc2_units"]
        self.device = config["device"]
        # Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size, self.fc1,
                                       self.fc2, self.seed).to(self.device)
        self.qnetwork_target = QNetwork(state_size, action_size, self.fc1,
                                        self.fc2, self.seed).to(self.device)

        self.optimizer = optim.Adam(self.qnetwork_local.parameters(),
                                    lr=self.lr)
        self.encoder = Encoder(config).to(self.device)
        self.encoder_optimizer = torch.optim.Adam(self.encoder.parameters(),
                                                  self.lr)

        # Replay memory

        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0

    def step(self, memory, writer):
        self.t_step += 1
        if len(memory) > self.batch_size:
            if self.t_step % 4 == 0:
                experiences = memory.sample(self.batch_size)
                self.learn(experiences, writer)

    def act(self, state, eps=0.):
        """Returns actions for given state as per current policy.
        
        Params
        ======
            state (array_like): current state
            eps (float): epsilon, for epsilon-greedy action selection
        """
        state = torch.from_numpy(state).float().unsqueeze(0).to(self.device)
        state = state.type(torch.float32).div_(255)
        self.qnetwork_local.eval()
        self.encoder.eval()
        with torch.no_grad():
            state = self.encoder.create_vector(state)
            action_values = self.qnetwork_local(state)
        self.qnetwork_local.train()
        self.encoder.train()

        # Epsilon-greedy action selection
        if random.random() > eps:
            return np.argmax(action_values.cpu().data.numpy())
        else:
            return random.choice(np.arange(self.action_size))

    def learn(self, experiences, writer):
        """Update value parameters using given batch of experience tuples.
        Params
        ======
            experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples 
            gamma (float): discount factor
        """
        states, actions, rewards, next_states, dones = experiences
        states = states.type(torch.float32).div_(255)
        states = self.encoder.create_vector(states)
        next_states = next_states.type(torch.float32).div_(255)
        next_states = self.encoder.create_vector(next_states)
        actions = actions.type(torch.int64)
        # Get max predicted Q values (for next states) from target model
        Q_targets_next = self.qnetwork_target(next_states).detach().max(
            1)[0].unsqueeze(1)
        # Compute Q targets for current states
        Q_targets = rewards + (self.gamma * Q_targets_next * dones)

        # Get expected Q values from local model
        Q_expected = self.qnetwork_local(states).gather(1, actions)

        # Compute loss
        loss = F.mse_loss(Q_expected, Q_targets)
        writer.add_scalar('Q_loss', loss, self.t_step)
        # Minimize the loss
        self.optimizer.zero_grad()
        self.encoder_optimizer.zero_grad()

        loss.backward()
        self.optimizer.step()
        self.encoder_optimizer.step()

        # ------------------- update target network ------------------- #
        self.soft_update(self.qnetwork_local, self.qnetwork_target)

    def soft_update(self, local_model, target_model):
        """Soft update model parameters.
        θ_target = τ*θ_local + (1 - τ)*θ_target
        Params
        ======
            local_model (PyTorch model): weights will be copied from
            target_model (PyTorch model): weights will be copied to
            tau (float): interpolation parameter 
        """
        for target_param, local_param in zip(target_model.parameters(),
                                             local_model.parameters()):
            target_param.data.copy_(self.tau * local_param.data +
                                    (1.0 - self.tau) * target_param.data)

    def save(self, filename):
        """
        """
        mkdir("", filename)
        torch.save(self.qnetwork_local.state_dict(), filename + "_q_net.pth")
        torch.save(self.optimizer.state_dict(),
                   filename + "_q_net_optimizer.pth")
        torch.save(self.encoder.state_dict(), filename + "_encoder.pth")
        torch.save(self.encoder_optimizer.state_dict(),
                   filename + "_encoder_optimizer.pth")
        print("Save models to {}".format(filename))
コード例 #10
0
ファイル: main.py プロジェクト: rmrafailov/Memory-PlaNet
        # Update and plot train reward metrics
        metrics['steps'].append(t + metrics['steps'][-1])
        metrics['episodes'].append(episode)
        metrics['train_rewards'].append(total_reward)
        lineplot(metrics['episodes'][-len(metrics['train_rewards']):],
                 metrics['train_rewards'], 'train_rewards', results_dir)

    print("Testing!")
    # Test model
    if episode % args.test_interval == 0:
        # Set models to eval mode
        transition_model.eval()
        observation_model.eval()
        reward_model.eval()
        encoder.eval()
        # Initialise parallelised test environments
        test_envs = EnvBatcher(
            Env, (args.env, args.symbolic_env, args.seed,
                  args.max_episode_length, args.action_repeat, args.bit_depth),
            {}, args.test_episodes)

        with torch.no_grad():
            observation, total_rewards, video_frames = test_envs.reset(
            ), np.zeros((args.test_episodes, )), []
            belief, posterior_state, action = torch.zeros(
                args.test_episodes, args.belief_size,
                device=args.device), torch.zeros(
                    args.test_episodes, args.state_size,
                    device=args.device), torch.zeros(args.test_episodes,
                                                     env.action_size,
コード例 #11
0
ファイル: solver.py プロジェクト: BorisAnimal/TS-UDA
class Solver:
    def __init__(self):
        self.train_lr = 1e-4
        self.num_classes = 9
        self.clf_target = Classifier().cuda()
        self.clf2 = Classifier().cuda()
        self.clf1 = Classifier().cuda()
        self.encoder = Encoder().cuda()
        self.pretrain_lr = 1e-4
        self.weights_coef = 1e-3

    def to_var(self, x):
        """Converts numpy to variable."""
        if torch.cuda.is_available():
            x = x.cuda()
        return Variable(x, requires_grad=False).float()

    def loss(self, predictions, y_true, weights_coef=None):
        """
        :param predictions: list of prediction tensors
        """
        assert len(predictions[0].shape) == 2 and len(
            y_true.shape) == 1, (predictions.shape, y_true.shape)
        losses = [F.cross_entropy(y_hat, y_true) for y_hat in predictions]
        loss = sum(losses)

        # """
        # We add the term |W1^T W2| to the cost function, where W1, W2 denote fully connected layers’
        # weights of F1 and F2 which are first applied to the feature F(xi)
        # """
        if weights_coef:
            lw = torch.matmul(solver.clf1.fc1.weight,
                              solver.clf2.fc1.weight.T).abs().sum().mean()
            loss += weights_coef * lw

        return loss

    def pretrain(self, source_loader, target_val_loader, pretrain_epochs=1):
        source_iter = iter(source_loader)
        source_per_epoch = len(source_iter)
        print("source_per_epoch:", source_per_epoch)

        # pretrain
        log_pre = 250
        lr = self.pretrain_lr
        pretrain_iters = source_per_epoch * pretrain_epochs
        params = reduce(
            lambda a, b: a + b,
            map(lambda i: list(i.parameters()),
                [self.encoder, self.clf1, self.clf2, self.clf_target]))
        pretrain_optimizer = optim.Adam(params, lr)
        accuracies = []

        for step in range(pretrain_iters + 1):
            # ============ Initialization ============#
            # refresh
            if (step + 1) % source_per_epoch == 0:
                source_iter = iter(source_loader)
            # load the data
            source, s_labels = next(source_iter)
            source, s_labels = self.to_var(source), self.to_var(
                s_labels).long().squeeze()

            # ============ Training ============ #
            pretrain_optimizer.zero_grad()
            # forward
            features = self.encoder(source)
            y1_hat = self.clf1(features)
            y2_hat = self.clf2(features)
            y_target_hat = self.clf_target(features)

            # loss
            loss_source_class = self.loss([y1_hat, y2_hat, y_target_hat],
                                          s_labels,
                                          weights_coef=self.weights_coef)

            # one step
            loss_source_class.backward()
            pretrain_optimizer.step()
            pretrain_optimizer.zero_grad()
            # TODO: make this each step and on log_pre step just average and print previous results
            # ============ Validation ============ #
            if (step + 1) % log_pre == 0:
                with torch.no_grad():
                    source_val_features = self.encoder(source)
                    c_source1 = self.clf1(source_val_features)
                    c_source2 = self.clf2(source_val_features)
                    c_target = self.clf_target(source_val_features)
                    print("Train data (source) scores:")
                    print("Step %d | Source clf1=%.2f, clf2=%.2f | Source data clf_t=%.2f" \
                          % (step,
                             accuracy(c_source1, s_labels),
                             accuracy(c_source2, s_labels),
                             accuracy(c_target, s_labels))
                          )
                    acc = self.eval(target_val_loader, self.clf_target)
                    print("Val target data acc=%.2f" % acc)
                    print()

    def pseudo_labeling(self, loader, pool_size=4000, threshold=0.9):
        """
        When C1, C2 denote the class which has the maximum predicted probability for
        y1, y2, we assign a pseudo-label to xk if the following two
        conditions are satisfied. First, we require C1 = C2 to give
        pseudo-labels, which means two different classifiers agree
        with the prediction. The second requirement is that the
        maximizing probability of y1 or y2 exceeds the threshold
        parameter, which we set as 0.9 or 0.95 in the experiment.

        :return:
        """
        pool = []  # x, y_pseudo
        for x, _ in loader:
            batch_size = x.shape[0]
            x = self.to_var(x)
            ys1 = F.softmax(self.clf1(self.encoder(x)))
            ys2 = F.softmax(self.clf2(self.encoder(x)))
            # _, pseudo_labels = torch.max(pseudo_labels, 1)
            for i in range(batch_size):
                y1 = ys1[i]
                y2 = ys2[i]
                val1, idx1 = torch.max(y1, 0)
                val2, idx2 = torch.max(y2, 0)
                if idx1 == idx2 and max(val1, val2) >= threshold:
                    pool.append((x[i].cpu(), idx1.cpu().item()))
                if len(pool) >= pool_size:
                    return pool
        return pool

    def train(self, source_loader, source_val_loader, target_loader,
              target_val_loader, epochs):
        """
        :param epochs: target epochs the training will be done
        """

        # pretrain
        log_pre = 30
        lr = self.train_lr

        params1 = reduce(
            lambda a, b: a + b,
            map(lambda i: list(i.parameters()),
                [self.encoder, self.clf1, self.clf2]))
        params2 = list(self.encoder.parameters()) + list(
            self.clf_target.parameters())
        optimizer1 = optim.Adam(params1, lr)
        optimizer2 = optim.Adam(params2, lr)

        # ad-hoc
        acs1 = []
        acs2 = []
        acs3 = []

        for epoch in range(epochs):
            source_iter = iter(source_loader)
            target_iter = iter(target_loader)

            source_per_epoch = len(source_iter)
            target_per_epoch = len(target_iter)
            if epoch == 0:
                print("source_per_epoch, target_per_epoch:", source_per_epoch,
                      target_per_epoch)
            if epoch == 3:
                for param_group in optimizer1.param_groups:
                    param_group['lr'] = lr * 0.1

                for param_group in optimizer2.param_groups:
                    param_group['lr'] = lr * 0.1
            if epoch == 6:
                for param_group in optimizer1.param_groups:
                    param_group['lr'] = lr * 0.01

                for param_group in optimizer2.param_groups:
                    param_group['lr'] = lr * 0.01

            # ============ Pseudo-labeling  ============ #
            # Fill candidates
            target_candidates = self.pseudo_labeling(target_loader,
                                                     pool_size=4000 * epoch)
            print("Target candidates len:", len(target_candidates))
            if len(target_candidates) <= 1:
                target_candidates = self.pseudo_labeling(target_loader,
                                                         threshold=0.0)
                print("Target candidates len:", len(target_candidates))
            target_candidates_loader = self.wrap_to_loader(
                target_candidates, batch_size=target_loader.batch_size)
            for step, (target,
                       t_labels) in enumerate(target_candidates_loader):
                if (step + 1) % source_per_epoch == 0:
                    source_iter = iter(source_loader)

                source, s_labels = next(source_iter)
                target, t_labels = self.to_var(target), self.to_var(
                    t_labels).long().squeeze()
                source, s_labels = self.to_var(source), self.to_var(
                    s_labels).long().squeeze()

                # ============ Train F, F1, F2  ============ #
                optimizer1.zero_grad()
                # Source data
                # forward
                features = self.encoder(source)
                y1s_hat = self.clf1(features)
                y2s_hat = self.clf2(features)
                # loss
                loss_source_class = self.loss([y1s_hat, y2s_hat],
                                              s_labels,
                                              weights_coef=self.weights_coef)

                # Target data
                # forward
                features = self.encoder(target)
                y1t_hat = self.clf1(features)
                y2t_hat = self.clf2(features)
                # loss
                loss_target_class = self.loss([y1t_hat, y2t_hat],
                                              t_labels,
                                              weights_coef=self.weights_coef)
                # one step
                (loss_source_class + loss_target_class).backward()
                optimizer1.step()
                optimizer1.zero_grad()

                # ============ Train F, Ft  ============ #
                optimizer2.zero_grad()
                # Target data
                # forward
                y_target_hat = self.clf_target(self.encoder(target))
                # loss
                loss_target_class = self.loss([y_target_hat], t_labels)
                # one step
                loss_target_class.backward()
                optimizer2.step()
                optimizer2.zero_grad()

                # ============ Validation ============ #
                acs1.append(accuracy(y1s_hat, s_labels).item())
                acs2.append(accuracy(y2s_hat, s_labels).item())
                acs3.append(accuracy(y_target_hat, t_labels).item())

                if (step + 1) % log_pre == 0:
                    acc = self.eval(target_val_loader, self.clf_target)
                    print("Step %d | Val data target classifier acc=%.2f" %
                          (step, acc))
                    print(
                        "          Train accuracy clf1=%.2f, clf2=%.2f, clf_t=%.2f"
                        % (np.mean(acs1), np.mean(acs2), np.mean(acs3)))
                    acs1 = []
                    acs2 = []
                    acs3 = []

                    # acc1 = self.eval(source_val_loader, self.clf1)
                    # print("        | Val data source classifier1 acc=%.2f" % acc1)
                    # acc2 = self.eval(source_val_loader, self.clf2)
                    # print("        | Val data source classifier2 acc=%.2f" % acc2)
                    print()

    def save_models(self):
        torch.save(self.encoder, 'encoder.pth')
        torch.save(self.clf1, 'clf1.pth')
        torch.save(self.clf2, 'clf2.pth')
        torch.save(self.clf_target, 'clf_target.pth')

    def load_models(self):
        self.encoder = torch.load('encoder.pth')
        self.clf1 = torch.load('clf1.pth')
        self.clf2 = torch.load('clf2.pth')
        self.clf_target = torch.load('clf_target.pth')

    def eval(self, loader, classifier):
        """
        Evaluate encoder + passed classifier
        """
        # for x, y_true in loader:
        #     y_hat = classifier(self.encoder)
        #     acc = accuracy(y_hat, y_true)

        class_correct = [0] * self.num_classes
        class_total = [0.] * self.num_classes
        classes = shl_processing.coarse_label_mapping
        self.encoder.eval()
        classifier.eval()

        for x, y_true in loader:
            # forward pass: compute predicted outputs by passing inputs to the model
            x, y_true = self.to_var(x), self.to_var(y_true).long().squeeze()

            y_hat = classifier(self.encoder(x))
            _, pred = torch.max(y_hat, 1)
            correct = np.squeeze(pred.eq(y_true.data.view_as(pred)))
            # calculate test accuracy for each object class
            for i in range(len(y_true.data)):
                label = y_true.data[i]
                class_correct[label] += correct[i].item()
                class_total[label] += 1

        for i in range(self.num_classes):
            if class_total[i] > 0:
                print('\tTest Accuracy of %10s: %2d%% (%2d/%2d)' %
                      (classes[i], 100 * class_correct[i] / class_total[i],
                       np.sum(class_correct[i]), np.sum(class_total[i])))
            else:
                print('\tTest Accuracy of %10s: N/A (no training examples)' %
                      (classes[i]))

        self.encoder.train()
        classifier.train()

        return 100. * np.sum(class_correct) / np.sum(class_total)

    def wrap_to_loader(self, target_candidates, batch_size):
        """
        :param target_candidates: [(x,y_pseudo)]
        :return:
        """
        assert len(target_candidates) > 0
        tmp = target_candidates  # CondomDataset(target_candidates)
        return torch.utils.data.DataLoader(dataset=tmp,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=0)

    def confusion_matrix(self, loader, classifier):

        labels = []
        preds = []
        for x, y_true in loader:
            labels += list(y_true.cpu().detach().numpy().flatten())
            x, y_true = self.to_var(x), self.to_var(y_true).long().squeeze()

            y_hat = classifier(self.encoder(x))
            _, pred = torch.max(y_hat, 1)

            preds += list(pred.cpu().detach().numpy().flatten())

        cm = confusion_matrix(labels, preds)

        df_cm = pd.DataFrame(cm,
                             index=coarse_label_mapping,
                             columns=coarse_label_mapping,
                             dtype=np.int)
        plt.figure(figsize=(10, 7))
        sn.heatmap(df_cm, annot=True)

        plt.show()
コード例 #12
0
def recover_models(device,
                   model="supervised",
                   m=256,
                   n=4,
                   chann_type="AWGN",
                   verbose=False):
    """
    Function to try to recover an already saved system to a channel
    Args:
        device (string): Current device that we are working in
        model (string): Model that wish to be recovered. Options: supervised or alternated
        chann_type (string): Channel type. Currently only AWGN available
        n (int): Length of the encoded messages
        m ((int): Total number of messages that can be encoded
    Returns:
        encoder/tx (Object): Recovered Tx/Encoder model
        decoder/rx (Object): Recovered Rx/Decoder model
    """
    try:
        if model == "supervised":
            enc_filename = "%s/%s_%d_%d_encoder.pth" % (MODELS_FOLDER,
                                                        chann_type, m, n)
            dec_filename = "%s/%s_%d_%d_decoder.pth" % (MODELS_FOLDER,
                                                        chann_type, m, n)

            encoder = Encoder(m=m, n=n)
            encoder.load_state_dict(torch.load(enc_filename))
            if verbose: print('Model loaded from %s.' % enc_filename)
            # Put them in the correct device and eval mode
            encoder.to(device)
            encoder.eval()

            decoder = Decoder(m=m, n=n)
            decoder.load_state_dict(torch.load(dec_filename))
            if verbose: print('Model loaded from %s.' % dec_filename)
            decoder.to(device)
            decoder.eval()

            return encoder, decoder
        else:
            tx_filename = "%s/%s_%d_%d_tx.pth" % (MODELS_FOLDER, chann_type, m,
                                                  n)
            rx_filename = "%s/%s_%d_%d_rx.pth" % (MODELS_FOLDER, chann_type, m,
                                                  n)

            tx = Transmitter(m=m, n=n)
            tx.load_state_dict(torch.load(tx_filename))
            if verbose: print('Model loaded from %s.' % tx_filename)
            # Put them in the correct device and eval mode
            tx.to(device)
            tx.eval()

            rx = Receiver(m=m, n=n)
            rx.load_state_dict(torch.load(rx_filename))
            if verbose: print('Model loaded from %s.' % rx_filename)
            rx.to(device)
            rx.eval()

            return tx, rx
    except:
        raise NameError("Something went wrong loading file for system (%s)" %
                        (chann_type))
コード例 #13
0
def main():
    """
    Describe main process including train and validation.
    """

    global start_epoch, checkpoint, fine_tune_encoder, best_bleu4, epochs_since_improvement, word_map

    # Read word map
    word_map_path = os.path.join(data_folder,
                                 'WORDMAP_' + dataset_name + ".json")
    with open(word_map_path, 'r') as j:
        word_map = json.load(j)

    # Set checkpoint or read from checkpoint
    if checkpoint is None:  # No pretrained model, set model from beginning
        decoder = Decoder(embed_dim=embed_dim,
                          decoder_dim=decoder_dim,
                          vocab_size=len(word_map),
                          dropout=dropout_rate)
        decoder_param = filter(lambda p: p.requires_grad, decoder.parameters())
        for param in decoder_param:
            tensor0 = param.data
            dist.all_reduce(tensor0, op=dist.reduce_op.SUM)
            param.data = tensor0 / np.sqrt(np.float(num_nodes))
        decoder_optimizer = optim.Adam(params=decoder_param, lr=decoder_lr)
        encoder = Encoder()
        encoder.fine_tune(fine_tune_encoder)
        encoder_param = filter(lambda p: p.requires_grad, encoder.parameters())
        if fine_tune_encoder:
            for param in encoder_param:
                tensor0 = param.data
                dist.all_reduce(tensor0, op=dist.reduce_op.SUM)
                param.data = tensor0 / np.sqrt(np.float(num_nodes))
        encoder_optimizer = optim.Adam(
            params=encoder_param, lr=encoder_lr) if fine_tune_encoder else None
    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint["epoch"] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        best_bleu4 = checkpoint['bleu-4']
        decoder = checkpoint['decoder']
        #decoder_optimizer = checkpoint['decoder_optimizer']
        encoder = checkpoint['encoder']
        #encoder_optimizer = checkpoint['encoder_optimizer']
        if fine_tune_encoder and encoder_optimizer is None:
            encoder.fine_tune(fine_tune_encoder)
            encoder_optimizer = torch.optim.Adam(params=filter(
                lambda p: p.requires_grad, encoder.parameters()),
                                                 lr=encoder_lr)

    decoder = decoder.to(device)
    encoder = encoder.to(device)
    criterion = nn.CrossEntropyLoss()

    # Data loader
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_set = CaptionDataset(data_folder=h5data_folder,
                               data_name=dataset_name,
                               split="TRAIN",
                               transform=transforms.Compose([normalize]))
    val_set = CaptionDataset(data_folder=h5data_folder,
                             data_name=dataset_name,
                             split="VAL",
                             transform=transforms.Compose([normalize]))
    train_loader = DataLoader(train_set,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=workers,
                              pin_memory=True)
    val_loader = DataLoader(val_set,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=workers,
                            pin_memory=True)

    total_start_time = datetime.datetime.now()
    print("Start the 1st epoch at: ", total_start_time)

    # Epoch
    for epoch in range(start_epoch, num_epochs):
        # Pre-check by epochs_since_improvement
        if epochs_since_improvement == 20:  # If there are 20 epochs that no improvements are achieved
            break
        if epochs_since_improvement % 8 == 0 and epochs_since_improvement > 0:
            adjust_learning_rate(decoder_optimizer)
            if fine_tune_encoder:
                adjust_learning_rate(encoder_optimizer)

        # For every batch
        batch_time = AverageMeter()  # forward prop. + back prop. time
        data_time = AverageMeter()  # data loading time
        losses = AverageMeter()  # loss (per word decoded)
        top5accs = AverageMeter()  # top5 accuracy
        decoder.train()
        encoder.train()

        start = time.time()
        start_time = datetime.datetime.now(
        )  # Initialize start time for this epoch

        # TRAIN
        for j, (images, captions, caplens) in enumerate(train_loader):
            if fine_tune_encoder and (epoch - start_epoch > 0 or j > 10):
                for group in encoder_optimizer.param_groups:
                    for p in group['params']:
                        state = encoder_optimizer.state[p]
                        if (state['step'] >= 1024):
                            state['step'] = 1000

            if (epoch - start_epoch > 0 or j > 10):
                for group in decoder_optimizer.param_groups:
                    for p in group['params']:
                        state = decoder_optimizer.state[p]
                        if (state['step'] >= 1024):
                            state['step'] = 1000

            data_time.update(time.time() - start)

            images = images.to(device)
            captions = captions.to(device)
            caplens = caplens.to(device)
            # Forward
            enc_images = encoder(images)
            predictions, enc_captions, dec_lengths, sort_ind = decoder(
                enc_images, captions, caplens)

            # Define target as original captions excluding <start>
            target = enc_captions[:, 1:]  # (batch_size, max_caption_length-1)
            target, _ = pack_padded_sequence(
                target, dec_lengths, batch_first=True
            )  # Delete all paddings and concat all other parts
            predictions, _ = pack_padded_sequence(
                predictions, dec_lengths,
                batch_first=True)  # (batch_size, sum(dec_lengths))

            loss = criterion(predictions, target)

            # Backward
            decoder_optimizer.zero_grad()
            if encoder_optimizer is not None:
                encoder_optimizer.zero_grad()
            loss.backward()
            ## Clip gradients
            if grad_clip is not None:
                clip_gradient(decoder_optimizer, grad_clip)
                if encoder_optimizer is not None:
                    clip_gradient(encoder_optimizer, grad_clip)
            ## Update
            decoder_optimizer.step()
            if encoder_optimizer is not None:
                encoder_optimizer.step()

            # Update metrics (AverageMeter)
            acc_top5 = compute_accuracy(predictions, target, k=5)
            top5accs.update(acc_top5, sum(dec_lengths))
            losses.update(loss.item(), sum(dec_lengths))
            batch_time.update(time.time() - start)

            # Print current status
            if (j + 1) % print_freq == 0:
                print(
                    'Epoch: [{0}][{1}/{2}]\t'
                    'Current Batch Time: {batch_time.val:.3f} (Average: {batch_time.avg:.3f})\t'
                    'Current Data Load Time: {data_time.val:.3f} (Average: {data_time.avg:.3f})\t'
                    'Current Loss: {loss.val:.4f} (Average: {loss.avg:.4f})\t'
                    'Current Top-5 Accuracy: {top5.val:.3f} (Average: {top5.avg:.3f})'
                    .format(epoch + 1,
                            j + 1,
                            len(train_loader),
                            batch_time=batch_time,
                            data_time=data_time,
                            loss=losses,
                            top5=top5accs))
                now_time = datetime.datetime.now()
                print("Epoch Training Time: ", now_time - start_time)
                print("Total Time: ", now_time - total_start_time)

            start = time.time()

        # VALIDATION
        decoder.eval()
        encoder.eval()

        batch_time = AverageMeter()  # forward prop. + back prop. time
        losses = AverageMeter()  # loss (per word decoded)
        top5accs = AverageMeter()  # top5 accuracy
        references = list(
        )  # references (true captions) for calculating BLEU-4 score
        hypotheses = list()  # hypotheses (predictions)

        start_time = datetime.datetime.now()

        for j, (images, captions, caplens, all_caps) in enumerate(val_loader):
            start = time.time()

            images = images.to(device)
            captions = captions.to(device)
            caplens = caplens.to(device)

            # Forward
            enc_images = encoder(images)
            predictions, enc_captions, dec_lengths, sort_ind = decoder(
                enc_images, captions, caplens)

            # Define target as original captions excluding <start>
            predictions_copy = predictions.clone()
            target = enc_captions[:, 1:]  # (batch_size, max_caption_length-1)
            target, _ = pack_padded_sequence(
                target, dec_lengths, batch_first=True
            )  # Delete all paddings and concat all other parts
            predictions, _ = pack_padded_sequence(
                predictions, dec_lengths,
                batch_first=True)  # (batch_size, sum(dec_lengths))

            loss = criterion(predictions, target)

            # Update metrics (AverageMeter)
            acc_top5 = compute_accuracy(predictions, target, k=5)
            top5accs.update(acc_top5, sum(dec_lengths))
            losses.update(loss.item(), sum(dec_lengths))
            batch_time.update(time.time() - start)

            # Print current status
            if (j + 1) % print_freq == 0:
                print(
                    'Epoch: [{0}][{1}/{2}]\t'
                    'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    'Data Load Time {data_time.val:.3f} ({data_time.avg:.3f})\t'
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                    'Top-5 Accuracy {top5.val:.3f} ({top5.avg:.3f})'.format(
                        epoch + 1,
                        j,
                        len(val_loader),
                        batch_time=batch_time,
                        data_time=data_time,
                        loss=losses,
                        top5=top5accs))
                now_time = datetime.datetime.now()
                print("Epoch Validation Time: ", now_time - start_time)
                print("Total Time: ", now_time - total_start_time)

            ## Store references (true captions), and hypothesis (prediction) for each image
            ## If for n images, we have n hypotheses, and references a, b, c... for each image, we need -
            ## references = [[ref1a, ref1b, ref1c], [ref2a, ref2b], ...], hypotheses = [hyp1, hyp2, ...]

            # references
            all_caps = all_caps[sort_ind]
            for k in range(all_caps.shape[0]):
                img_caps = all_caps[k].tolist()
                img_captions = list(
                    map(
                        lambda c: [
                            w for w in c if w not in
                            {word_map["<start>"], word_map["<pad>"]}
                        ], img_caps))
                references.append(img_captions)

            # hypotheses
            _, preds = torch.max(predictions_copy, dim=2)
            preds = preds.tolist()
            temp_preds = list()
            for i, p in enumerate(preds):
                temp_preds.append(preds[i][:dec_lengths[i]])  # remove pads
            preds = temp_preds
            hypotheses.extend(preds)

            assert len(references) == len(hypotheses)

        ## Compute BLEU-4 Scores
        #recent_bleu4 = corpus_bleu(references, hypotheses, emulate_multibleu=True)
        recent_bleu4 = corpus_bleu(references, hypotheses)

        print(
            '\n * LOSS - {loss.avg:.3f}, TOP-5 ACCURACY - {top5.avg:.3f}, BLEU-4 - {bleu}\n'
            .format(loss=losses, top5=top5accs, bleu=recent_bleu4))

        # CHECK IMPROVEMENT
        is_best = recent_bleu4 > best_bleu4
        best_bleu4 = max(recent_bleu4, best_bleu4)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement))
        else:
            epochs_since_improvement = 0

        # SAVE CHECKPOINT
        save_checkpoint(dataset_name, epoch, epochs_since_improvement, encoder,
                        decoder, encoder_optimizer, decoder_optimizer,
                        recent_bleu4, is_best)
        print("Epoch {}, cost time: {}\n".format(epoch + 1,
                                                 now_time - total_start_time))
コード例 #14
0
def make_representations(arguments, device):
    """
    Creates representations for all data.
    :param arguments: Dictionary containing arguments.
    :param device: PyTorch device object.
    """

    # Loads training and testing data.
    train_data = Dataset(arguments, "train")
    test_data = Dataset(arguments, "test")

    # Creates the data loaders for the training and testing data.
    training_data_loader = DataLoader(train_data,
                                      batch_size=arguments["batch_size"],
                                      shuffle=False,
                                      num_workers=arguments["data_workers"],
                                      pin_memory=False,
                                      drop_last=False)
    testing_data_loader = DataLoader(test_data,
                                     batch_size=arguments["batch_size"],
                                     shuffle=False,
                                     num_workers=arguments["data_workers"],
                                     pin_memory=False,
                                     drop_last=False)

    log(arguments, "Loaded Datasets")

    # Initialises the encoder.
    encoder = Encoder(0, arguments["image_size"],
                      arguments["pretrained"] == "imagenet")

    # Loads weights from pretrained Contrastive Predictive Coding model.
    if arguments["pretrained"].lower() == "cpc":
        encoder_path = os.path.join(
            arguments["model_dir"],
            f"{arguments['experiment']}_encoder_best.pt")
        encoder.load_state_dict(torch.load(encoder_path, map_location=device),
                                strict=False)

    # Sets the model to evaluation mode.
    encoder.eval()

    # Moves the model to the selected device.
    encoder.to(device)

    # If 16 bit precision is being used change the model and optimiser precision.
    if arguments["precision"] == 16:
        encoder = amp.initialize(encoder, opt_level="O2", verbosity=False)

    # Checks if precision level is supported and if not defaults to 32.
    elif arguments["precision"] != 32:
        log(
            arguments,
            "Only 16 and 32 bit precision supported. Defaulting to 32 bit precision."
        )

    log(arguments, "Models Initialised")

    # Creates a folder if one does not exist.
    os.makedirs(os.path.dirname(arguments["representation_dir"]),
                exist_ok=True)

    # Creates the HDF5 files used to store the training and testing data representations.
    train_representations = HDF5Handler(
        os.path.join(arguments["representation_dir"],
                     f"{arguments['experiment']}_train.h5"), 'x',
        (encoder.encoder_size, ))
    test_representations = HDF5Handler(
        os.path.join(arguments["representation_dir"],
                     f"{arguments['experiment']}_test.h5"), 'x',
        (encoder.encoder_size, ))

    log(arguments, "HDF5 Representation Files Created.")

    # Starts a timer.
    start_time = time.time()

    # Performs a representation generation with no gradients.
    with torch.no_grad():

        # Loops through the training data.
        num_batches = 0
        for images, _ in training_data_loader:

            # Loads the image batch into memory.
            images = images.to(device)

            # Gets the representations from of the image batch from the encoder.
            representations = encoder.forward_features(images)

            # Moves the representations to the CPU.
            representations = representations.cpu().data.numpy()

            # Adds the batch representations to the HDF5 file.
            train_representations.append(representations)

            # Prints information about representation extraction process.
            num_batches += 1
            if num_batches % arguments["log_intervals"] == 0:
                print(
                    f"Training Batches: {num_batches}/{len(train_data) // arguments['batch_size']}"
                )

        # Loops through the testing data.
        num_batches = 0
        for images, _ in testing_data_loader:

            # Loads the image batch into memory.
            images = images.to(device)

            # Gets the representations from of the image batch from the encoder.
            representations = encoder.forward_features(images)

            # Moves the representations to the CPU.
            representations = representations.cpu().data.numpy()

            # Adds the batch representations to the HDF5 file.
            test_representations.append(representations)

            # Prints information about representation extraction process.
            num_batches += 1
            if num_batches % arguments["log_intervals"] == 0:
                print(
                    f"Testing Batches: {num_batches}/{len(test_data) // arguments['batch_size']}"
                )

    print(
        f"Representations from {arguments['experiment']} encoder created in {int(time.time() - start_time)}s"
    )
コード例 #15
0
class Trainer:
    def __init__(self, device, dset, x_dim, c_dim, z_dim, n_train, n_test, lr,
                 layer_sizes, **kwargs):
        '''
        Trainer class
        Args:
            device (torch.device) : Use GPU or CPU
            x_dim (int)           : Feature dimension
            c_dim (int)           : Attribute dimension
            z_dim (int)           : Latent dimension
            n_train (int)         : Number of training classes
            n_test (int)          : Number of testing classes
            lr (float)            : Learning rate for VAE
            layer_sizes(dict)     : List containing the hidden layer sizes
            **kwargs              : Flags for using various regularizations
        '''
        self.device = device
        self.dset = dset
        self.lr = lr
        self.z_dim = z_dim

        self.n_train = n_train
        self.n_test = n_test
        self.gzsl = kwargs.get('gzsl', False)
        if self.gzsl:
            self.n_test = n_train + n_test

        # flags for various regularizers
        self.use_da = kwargs.get('use_da', False)
        self.use_ca = kwargs.get('use_ca', False)
        self.use_support = kwargs.get('use_support', False)

        self.x_encoder = Encoder(x_dim, layer_sizes['x_enc'],
                                 z_dim).to(self.device)
        self.x_decoder = Decoder(z_dim, layer_sizes['x_dec'],
                                 x_dim).to(self.device)

        self.c_encoder = Encoder(c_dim, layer_sizes['c_enc'],
                                 z_dim).to(self.device)
        self.c_decoder = Decoder(z_dim, layer_sizes['c_dec'],
                                 c_dim).to(self.device)

        self.support_classifier = Classifier(z_dim,
                                             self.n_train).to(self.device)

        params = list(self.x_encoder.parameters()) + \
                 list(self.x_decoder.parameters()) + \
                 list(self.c_encoder.parameters()) + \
                 list(self.c_decoder.parameters())

        if self.use_support:
            params += list(self.support_classifier.parameters())

        self.optimizer = optim.Adam(params, lr=lr)

        self.final_classifier = Classifier(z_dim, self.n_test).to(self.device)
        self.final_cls_optim = optim.RMSprop(
            self.final_classifier.parameters(), lr=2e-4)
        self.criterion = nn.CrossEntropyLoss()

        self.vae_save_path = './saved_models'
        self.disc_save_path = './saved_models/disc_model_%s.pth' % self.dset

    def fit_VAE(self, x, c, y, ep):
        '''
        Train on 1 minibatch of data
        Args:
            x (torch.Tensor) : Features of size (batch_size, 2048)
            c (torch.Tensor) : Attributes of size (batch_size, attr_dim)
            y (torch.Tensor) : Target labels of size (batch_size,)
            ep (int)         : Epoch number
        Returns:
            Loss for the minibatch -
            3-tuple with (vae_loss, distributn loss, cross_recon loss)
        '''
        self.anneal_parameters(ep)

        x = Variable(x.float()).to(self.device)
        c = Variable(c.float()).to(self.device)
        y = Variable(y.long()).to(self.device)

        # VAE for image embeddings
        mu_x, logvar_x = self.x_encoder(x)
        z_x = self.reparameterize(mu_x, logvar_x)
        x_recon = self.x_decoder(z_x)

        # VAE for class embeddings
        mu_c, logvar_c = self.c_encoder(c)
        z_c = self.reparameterize(mu_c, logvar_c)
        c_recon = self.c_decoder(z_c)

        # reconstruction loss
        L_recon_x = self.compute_recon_loss(x, x_recon)
        L_recon_c = self.compute_recon_loss(c, c_recon)

        # KL divergence loss
        D_kl_x = self.compute_kl_div(mu_x, logvar_x)
        D_kl_c = self.compute_kl_div(mu_c, logvar_c)

        # VAE Loss = recon_loss - KL_Divergence_loss
        L_vae_x = L_recon_x - self.beta * D_kl_x
        L_vae_c = L_recon_c - self.beta * D_kl_c
        L_vae = L_vae_x + L_vae_c

        # calculate cross alignment loss
        L_ca = torch.zeros(1).to(self.device)
        if self.use_ca:
            x_recon_from_c = self.x_decoder(z_c)
            L_ca_x = self.compute_recon_loss(x, x_recon_from_c)

            c_recon_from_x = self.c_decoder(z_x)
            L_ca_c = self.compute_recon_loss(c, c_recon_from_x)

            L_ca = L_ca_x + L_ca_c

        # calculate distribution alignment loss
        L_da = torch.zeros(1).to(self.device)
        if self.use_da:
            L_da = 2 * self.compute_da_loss(mu_x, logvar_x, mu_c, logvar_c)

        # calculate loss from support classifier
        L_sup = torch.zeros(1).to(self.device)
        if self.use_support:
            y_prob = F.softmax(self.support_classifier(z_x), dim=0)
            log_prob = torch.log(torch.gather(y_prob, 1, y.unsqueeze(1)))
            L_sup = -1 * torch.mean(log_prob)

        total_loss = L_vae + self.gamma * L_ca + self.delta * L_da + self.alpha * L_sup

        self.optimizer.zero_grad()
        total_loss.backward()
        self.optimizer.step()

        return L_vae.item(), L_da.item(), L_ca.item()

    def reparameterize(self, mu, log_var):
        '''
        Reparameterization trick using unimodal gaussian
        '''
        # eps = Variable(torch.randn(mu.size())).to(self.device)
        eps = Variable(torch.randn(mu.size()[0],
                                   1).expand(mu.size())).to(self.device)
        z = mu + torch.exp(log_var / 2.0) * eps
        return z

    def anneal_parameters(self, epoch):
        '''
        Change weight factors of various losses based on epoch number
        '''
        # weight of kl divergence loss
        if epoch <= 90:
            self.beta = 0.0026 * epoch

        # weight of Cross Alignment loss
        if epoch < 20:
            self.gamma = 0
        if epoch >= 20 and epoch <= 75:
            self.gamma = 0.044 * (epoch - 20)

        # weight of distribution alignment loss
        if epoch < 5:
            self.delta = 0
        if epoch >= 5 and epoch <= 22:
            self.delta = 0.54 * (epoch - 5)

        # weight of support loss
        if epoch < 5:
            self.alpha = 0
        else:
            self.alpha = 0.01

    def compute_recon_loss(self, x, x_recon):
        '''
        Compute the reconstruction error.
        '''
        l1_loss = torch.abs(x - x_recon).sum()
        # l1_loss = torch.abs(x - x_recon).sum(dim=1).mean()
        return l1_loss

    def compute_kl_div(self, mu, log_var):
        '''
        Compute KL Divergence between N(mu, var) & N(0, 1).
        '''
        kld = 0.5 * (1 + log_var - mu.pow(2) - log_var.exp()).sum()
        # kld = 0.5 * (1 + log_var - mu.pow(2) - log_var.exp()).sum(dim=1).mean()
        return kld

    def compute_da_loss(self, mu1, log_var1, mu2, log_var2):
        '''
        Computes Distribution Alignment loss between 2 normal distributions.
        Uses Wasserstein distance as distance measure.
        '''
        l1 = (mu1 - mu2).pow(2).sum(dim=1)

        std1 = (log_var1 / 2.0).exp()
        std2 = (log_var2 / 2.0).exp()
        l2 = (std1 - std2).pow(2).sum(dim=1)

        l_da = torch.sqrt(l1 + l2).sum()
        return l_da

    def fit_final_classifier(self, x, y):
        '''
        Train the final classifier on synthetically generated data
        '''
        x = Variable(x.float()).to(self.device)
        y = Variable(y.long()).to(self.device)

        logits = self.final_classifier(x)
        loss = self.criterion(logits, y)

        self.final_cls_optim.zero_grad()
        loss.backward()
        self.final_cls_optim.step()

        return loss.item()

    def fit_MOE(self, x, y):
        '''
        Trains the synthetic dataset on a MoE model
        '''

    def get_vae_savename(self):
        '''
        Returns a string indicative of various flags used during training and
        dataset used. Works as a unique name for saving models
        '''
        flags = ''
        if self.use_da:
            flags += '-da'
        if self.use_ca:
            flags += '-ca'
        if self.use_support:
            flags += '-support'
        model_name = 'vae_model__dset-%s__lr-%f__z-%d__%s.pth' % (
            self.dset, self.lr, self.z_dim, flags)
        return model_name

    def save_VAE(self, ep):
        state = {
            'epoch': ep,
            'x_encoder': self.x_encoder.state_dict(),
            'x_decoder': self.x_decoder.state_dict(),
            'c_encoder': self.c_encoder.state_dict(),
            'c_decoder': self.c_decoder.state_dict(),
            'optimizer': self.optimizer.state_dict(),
        }
        model_name = self.get_vae_savename()
        torch.save(state, os.path.join(self.vae_save_path, model_name))

    def load_models(self, model_path=''):
        if model_path is '':
            model_path = os.path.join(self.vae_save_path,
                                      self.get_vae_savename())

        ep = 0
        if os.path.exists(model_path):
            checkpoint = torch.load(model_path)
            self.x_encoder.load_state_dict(checkpoint['x_encoder'])
            self.x_decoder.load_state_dict(checkpoint['x_decoder'])
            self.c_encoder.load_state_dict(checkpoint['c_encoder'])
            self.c_decoder.load_state_dict(checkpoint['c_decoder'])
            self.optimizer.load_state_dict(checkpoint['optimizer'])
            ep = checkpoint['epoch']

        return ep

    def create_syn_dataset(self,
                           test_labels,
                           attributes,
                           seen_dataset,
                           n_samples=400):
        '''
        Creates a synthetic dataset based on attribute vectors of unseen class
        Args:
            test_labels: A dict with key as original serial number in provided
                dataset and value as the index which is predicted during
                classification by network
            attributes: A np array containing class attributes for each class
                of dataset
            seen_dataset: A list of 3-tuple (x, _, y) where x belongs to one of the
                seen classes and y is corresponding label. Used for generating
                latent representations of seen classes in GZSL
            n_samples: Number of samples of each unseen class to be generated(Default: 400)
        Returns:
            A list of 3-tuple (z, _, y) where z is latent representations and y is
            corresponding label
        '''
        syn_dataset = []
        for test_cls, idx in test_labels.items():
            attr = attributes[test_cls - 1]

            self.c_encoder.eval()
            c = Variable(torch.FloatTensor(attr).unsqueeze(0)).to(self.device)
            mu, log_var = self.c_encoder(c)

            Z = torch.cat(
                [self.reparameterize(mu, log_var) for _ in range(n_samples)])

            syn_dataset.extend([(Z[i], test_cls, idx)
                                for i in range(n_samples)])

        if seen_dataset is not None:
            self.x_encoder.eval()
            for (x, att_idx, y) in seen_dataset:
                x = Variable(torch.FloatTensor(x).unsqueeze(0)).to(self.device)
                mu, log_var = self.x_encoder(x)
                z = self.reparameterize(mu, log_var).squeeze()
                syn_dataset.append((z, att_idx, y))

        return syn_dataset

    def compute_accuracy(self, generator):
        y_real_list, y_pred_list = [], []

        for idx, (x, _, y) in enumerate(generator):
            x = Variable(x.float()).to(self.device)
            y = Variable(y.long()).to(self.device)

            self.final_classifier.eval()
            self.x_encoder.eval()
            mu, log_var = self.x_encoder(x)
            logits = self.final_classifier(mu)

            _, y_pred = logits.max(dim=1)

            y_real = y.detach().cpu().numpy()
            y_pred = y_pred.detach().cpu().numpy()

            y_real_list.extend(y_real)
            y_pred_list.extend(y_pred)

        ## We have sequence of real and predicted labels
        ## find seen and unseen classes accuracy

        if self.gzsl:
            y_real_list = np.asarray(y_real_list)
            y_pred_list = np.asarray(y_pred_list)

            y_seen_real = np.extract(y_real_list < self.n_train, y_real_list)
            y_seen_pred = np.extract(y_real_list < self.n_train, y_pred_list)

            y_unseen_real = np.extract(y_real_list >= self.n_train,
                                       y_real_list)
            y_unseen_pred = np.extract(y_real_list >= self.n_train,
                                       y_pred_list)

            acc_seen = accuracy_score(y_seen_real, y_seen_pred)
            acc_unseen = accuracy_score(y_unseen_real, y_unseen_pred)

            return acc_seen, acc_unseen

        else:
            return accuracy_score(y_real_list, y_pred_list)
コード例 #16
0
def main(args):

	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	print(f"Evaluating on {device}")

	with open(args.vocab_path, 'rb') as f:
		vocab_object = pickle.load(f)
	print(f"Loaded the vocabulary object from {args.vocab_path}, total size={len(vocab_object)}")

	if args.glove_embed_path is not None:
		with open(args.glove_embed_path, 'rb') as f:
			glove_embeddings = pickle.load(f)
		print(f"Loaded the glove embeddings from {args.glove_embed_path}, total size={len(glove_embeddings)}")

		# We are using 300d glove embeddings
		args.embed_size = 300

		weights_matrix = np.zeros((len(vocab_object), args.embed_size))

		for word, index in vocab_object.word2index.items():
			if word in glove_embeddings:
				weights_matrix[index] = glove_embeddings[word]
			else:
				weights_matrix[index] = np.random.normal(scale=0.6, size=(args.embed_size, ))

		weights_matrix = torch.from_numpy(weights_matrix).float().to(device)

	else:
		weights_matrix = None


	img_transforms = transforms.Compose([
		                transforms.Resize((224, 224)),
		                transforms.ToTensor(),
		                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
	                ])

	val_dataset = ImageDataset(args.image_root, img_transforms)

	val_dataloader = torch.utils.data.DataLoader(
		dataset=val_dataset, 
		batch_size=args.batch_size, 
		shuffle=False, 
		num_workers=args.num_workers)

	encoder = Encoder(args.resnet_size, (3, 224, 224), args.embed_size)
	encoder = encoder.eval().to(device)
	decoder = Decoder(args.rnn_type, weights_matrix, len(vocab_object), args.embed_size, args.hidden_size)
	decoder = decoder.eval().to(device)
	
	model_ckpt = torch.load(args.eval_ckpt_path, map_location=lambda storage, loc: storage)
	encoder.load_state_dict(model_ckpt['encoder'])
	decoder.load_state_dict(model_ckpt['decoder'])
	print(f"Loaded model from {args.eval_ckpt_path}")

	val_results = []

	total_examples = len(val_dataloader)
	for i, (images, image_ids) in enumerate(val_dataloader):

		images = images.to(device)

		with torch.no_grad():
			image_embeddings = encoder(images)
			captions_wid = decoder.sample_batch(image_embeddings, args.caption_maxlen)

		captions_wid = captions_wid.cpu().numpy()
		captions = []
		for caption_wid in captions_wid:
			caption_words = []
			for word_id in caption_wid:
				word = vocab_object.index2word[word_id]
				caption_words.append(word)
				if word == '<end>':
					break
			captions.append(' '.join(caption_words[1:-2]))

		image_ids = image_ids.tolist()
		for image_id, caption in zip(image_ids, captions):
			val_results.append({'image_id': image_id, 'caption': caption})

	with open(args.results_json_path,'w') as f:
		json.dump(val_results, f)
コード例 #17
0
def main():

    start_epoch = 0
    max_loss = math.inf
    epochs_since_improvement = 0

    dataset = GaitSequenceDataset(root_dir = data_dir,
                                    longest_sequence = 85,
                                    shortest_sequence = 55)

    train_sampler, validation_sampler = generate_train_validation_samplers(dataset, validation_split=0.2)

    print('Building dataloaders..')
    train_dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler, drop_last=True)
    validation_dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=validation_sampler, drop_last=True)

    if load_pretrained is True:
        print('Loading pretrained model..')
        checkpoint = torch.load(checkpoint_path)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        encoder = checkpoint['encoder']
        decoder = checkpoint['decoder']
        encoder_optimizer = checkpoint['encoder_optimizer']
        decoder_optimizer = checkpoint['decoder_optimizer']

    else:
        print('Creating model..')
        encoder = Encoder(sequence_length, num_features, embedding_dimension)
        decoder = Decoder(embedding_dimension, num_classes, hidden_dimension, sequence_length)
        encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=learning_rate)
        decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate)

    criterion = nn.MSELoss().to(device)

    if mode == 'train':

        summary = SummaryWriter()
        #summary = None

        encoder.to(device)
        decoder.to(device)

        for epoch in range(start_epoch, start_epoch+num_epochs):

            if epochs_since_improvement == 20 :
                break

            if epochs_since_improvement > 0 and epochs_since_improvement % 4 == 0:
                adjust_learning_rate(encoder_optimizer, 0.8)

            train(encoder, decoder, train_dataloader, encoder_optimizer, decoder_optimizer, criterion, 
                    clip_gradient, device, epoch, num_epochs, summary, loss_display_interval)

            current_loss = validate(encoder, decoder, validation_dataloader, criterion, device, epoch, num_epochs, 
                                summary, loss_display_interval)

            is_best = max_loss > current_loss
            max_loss = min(max_loss, current_loss)
            if not is_best:
                epochs_since_improvement += 1
                print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,))
            else:
                epochs_since_improvement = 0

            save_checkpoint(epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, is_best)

    else:
        print('testing...')
        encoder.to(device)
        decoder.to(device)
        encoder.eval()
        decoder.eval()
        for batch_idx, data in enumerate(validation_dataloader):
            sequence = data['sequence'][0].unsqueeze(0).permute(1, 0, 2).to(device)
            seq_len = data['sequence_length'][0]
            x ,(hidden_state, cell_state)= encoder(sequence)
            prediction = decoder(hidden_state)
            
            sequence = sequence.squeeze(1).detach().cpu().numpy()
            prediction = prediction.squeeze(1).detach().cpu().numpy()

            print(sequence.shape)
            hip_angles_gt = sequence[:seq_len, [0,3]]
            knee_angles_gt = sequence[:seq_len, [1,4]]
            ankle_angles_gt = sequence[:seq_len, [2,5]]

            hip_angles_pred = prediction[:seq_len, [0,3]]
            knee_angles_pred = prediction[:seq_len, [1,4]]
            ankle_angles_pred = prediction[:seq_len, [2,5]]

            time = np.arange(0, len(hip_angles_gt), 1)
            
            fig, axs = plt.subplots(4)
            # fig.suptitle('Hip angle reconstruction')
            # axs[0].plot(time, hip_angles_gt[:,0])
            # axs[0].set_title('Left hip ground truth')
            # axs[1].plot(time, hip_angles_pred[:,0])
            # axs[1].set_title('Left hip prediction')
            # axs[2].plot(time, hip_angles_gt[:,1])
            # axs[2].set_title('Right hip ground truth')
            # axs[3].plot(time, hip_angles_pred[:,1])
            # axs[3].set_title('Right hip prediction')

            # fig.suptitle('Knee angle reconstruction')
            # axs[0].plot(time, knee_angles_gt[:,0])
            # axs[0].set_title('Left knee ground truth')
            # axs[1].plot(time, knee_angles_pred[:,0])
            # axs[1].set_title('Left knee prediction')
            # axs[2].plot(time, knee_angles_gt[:,1])
            # axs[2].set_title('Right knee ground truth')
            # axs[3].plot(time, knee_angles_pred[:,1])
            # axs[3].set_title('Right knee prediction')

            fig.suptitle('Ankle angle reconstruction')
            axs[0].plot(time, ankle_angles_gt[:,0])
            axs[0].set_title('Left ankle ground truth')
            axs[1].plot(time, ankle_angles_pred[:,0])
            axs[1].set_title('Left ankle prediction')
            axs[2].plot(time, ankle_angles_gt[:,1])
            axs[2].set_title('Right ankle ground truth')
            axs[3].plot(time, ankle_angles_pred[:,1])
            axs[3].set_title('Right ankle prediction')

            plt.show()

            break
コード例 #18
0
class AAETrainer(AbstractTrainer):
    def __init__(self, opt):
        super().__init__(opt)

        print('[info] Dataset:', self.opt.dataset)
        print('[info] Alhpa = ', self.opt.alpha)
        print('[info] Latent dimension = ', self.opt.latent_dim)

        self.opt = opt
        self.start_visdom()

    def start_visdom(self):
        self.vis = utils.Visualizer(env='Adversarial AutoEncoder Training',
                                    port=8888)

    def build_network(self):
        print('[info] Build the network architecture')
        self.encoder = Encoder(z_dim=self.opt.latent_dim)
        if self.opt.dataset == 'SMPL':
            num_verts = 6890
        elif self.opt.dataset == 'all_animals':
            num_verts = 3889
        self.decoder = Decoder(num_verts=num_verts, z_dim=self.opt.latent_dim)
        self.discriminator = Discriminator(input_dim=self.opt.latent_dim)

        self.encoder.cuda()
        self.decoder.cuda()
        self.discriminator.cuda()

    def build_optimizer(self):
        print('[info] Build the optimizer')
        self.optim_dis = optim.SGD(self.discriminator.parameters(),
                                   lr=self.opt.learning_rate)
        self.optim_AE = optim.Adam(itertools.chain(self.encoder.parameters(),
                                                   self.decoder.parameters()),
                                   lr=self.opt.learning_rate)

    def build_dataset_train(self):
        train_data = ACAPData(mode='train', name=self.opt.dataset)
        self.num_train_data = len(train_data)
        print('[info] Number of training samples = ', self.num_train_data)
        self.train_loader = torch.utils.data.DataLoader(
            train_data, batch_size=self.opt.batch_size, shuffle=True)

    def build_dataset_valid(self):
        valid_data = ACAPData(mode='valid', name=self.opt.dataset)
        self.num_valid_data = len(valid_data)
        print('[info] Number of validation samples = ', self.num_valid_data)
        self.valid_loader = torch.utils.data.DataLoader(valid_data,
                                                        batch_size=128,
                                                        shuffle=True)

    def build_losses(self):
        print('[info] Build the loss functions')
        self.mseLoss = torch.nn.MSELoss()
        self.ganLoss = torch.nn.BCELoss()

    def print_iteration_stats(self):
        """
        print stats at each iteration
        """
        print(
            '\r[Epoch %d] [Iteration %d/%d] enc = %f dis = %f rec = %f' %
            (self.epoch, self.iteration,
             int(self.num_train_data / self.opt.batch_size),
             self.enc_loss.item(), self.dis_loss.item(), self.rec_loss.item()),
            end='')

    def train_iteration(self):

        self.encoder.train()
        self.decoder.train()
        self.discriminator.train()

        x = self.data.cuda()

        z = self.encoder(x)
        ''' Discriminator '''
        # sample from N(0, I)
        z_real = Variable(torch.randn(z.size(0), z.size(1))).cuda()

        y_real = Variable(torch.ones(z.size(0))).cuda()
        dis_real_loss = self.ganLoss(
            self.discriminator(z_real).view(-1), y_real)

        y_fake = Variable(torch.zeros(z.size(0))).cuda()
        dis_fake_loss = self.ganLoss(self.discriminator(z).view(-1), y_fake)

        self.optim_dis.zero_grad()
        self.dis_loss = 0.5 * (dis_fake_loss + dis_real_loss)
        self.dis_loss.backward(retain_graph=True)
        self.optim_dis.step()
        self.dis_losses.append(self.dis_loss.item())
        ''' Autoencoder '''
        # Encoder hopes to generate latent vectors that are closed to prior.
        y_real = Variable(torch.ones(z.size(0))).cuda()
        self.enc_loss = self.ganLoss(self.discriminator(z).view(-1), y_real)

        # Decoder hopes to make the reconstruction as similar to input as possible.
        rec = self.decoder(z)
        self.rec_loss = self.mseLoss(rec, x)

        # There is a trade-off here:
        # Latent regularization V.S. Reconstruction quality
        self.EG_loss = self.opt.alpha * self.enc_loss + (
            1 - self.opt.alpha) * self.rec_loss

        self.optim_AE.zero_grad()
        self.EG_loss.backward()
        self.optim_AE.step()

        self.enc_losses.append(self.enc_loss.item())
        self.rec_losses.append(self.rec_loss.item())

        self.print_iteration_stats()
        self.increment_iteration()

    def train_epoch(self):

        self.reset_iteration()
        self.dis_losses = []
        self.enc_losses = []
        self.rec_losses = []
        for step, data in enumerate(self.train_loader):
            self.data = data
            self.train_iteration()

        self.dis_losses = torch.Tensor(self.dis_losses)
        self.dis_losses = torch.mean(self.dis_losses)

        self.enc_losses = torch.Tensor(self.enc_losses)
        self.enc_losses = torch.mean(self.enc_losses)

        self.rec_losses = torch.Tensor(self.rec_losses)
        self.rec_losses = torch.mean(self.rec_losses)

        self.vis.draw_line(win='Encoder Loss', x=self.epoch, y=self.enc_losses)
        self.vis.draw_line(win='Discriminator Loss',
                           x=self.epoch,
                           y=self.dis_losses)
        self.vis.draw_line(win='Reconstruction Loss',
                           x=self.epoch,
                           y=self.rec_losses)

    def valid_iteration(self):

        self.encoder.eval()
        self.decoder.eval()
        self.discriminator.eval()

        x = self.data.cuda()
        z = self.encoder(x)
        recon = self.decoder(z)

        # loss
        rec_loss = self.mseLoss(recon, x)
        self.rec_loss.append(rec_loss.item())
        self.increment_iteration()

    def valid_epoch(self):
        self.reset_iteration()
        self.rec_loss = []
        for step, data in enumerate(self.valid_loader):
            self.data = data
            self.valid_iteration()

        self.rec_loss = torch.Tensor(self.rec_loss)
        self.rec_loss = torch.mean(self.rec_loss)
        self.vis.draw_line(win='Valid reconstruction loss',
                           x=self.epoch,
                           y=self.rec_loss)

    def save_network(self):
        print("\n[info] saving net...")
        torch.save(self.encoder.state_dict(),
                   f"{self.opt.save_path}/Encoder.pth")
        torch.save(self.decoder.state_dict(),
                   f"{self.opt.save_path}/Decoder.pth")
        torch.save(self.discriminator.state_dict(),
                   f"{self.opt.save_path}/Discriminator.pth")
コード例 #19
0
def main():
    start_epoch = 0
    max_loss = math.inf
    epochs_since_improvement = 0

    # Creating custom dataset
    dataset = GaitSequenceDataset(root_dir=data_dir,
                                  longest_sequence=85,
                                  shortest_sequence=55)

    # Saplers for training and validation dataloaders
    train_sampler, validation_sampler = generate_train_validation_samplers(
        dataset, validation_split=0.2)

    print('Building dataloaders..')
    train_dataloader = torch.utils.data.DataLoader(dataset,
                                                   batch_size=batch_size,
                                                   sampler=train_sampler,
                                                   drop_last=True)
    validation_dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        sampler=validation_sampler,
        drop_last=True)

    # Loading a pretrained model
    if load_pretrained is True:
        print('Loading pretrained model..')
        checkpoint = torch.load(best_checkpoint_path)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        encoder = checkpoint['encoder']
        decoder = checkpoint['decoder']
        encoder_optimizer = checkpoint['encoder_optimizer']
        decoder_optimizer = checkpoint['decoder_optimizer']

    else:
        print('Creating model..')
        encoder = Encoder(sequence_length, num_features, embedding_dimension)
        decoder = Decoder(embedding_dimension, num_features, hidden_dimension,
                          sequence_length)
        encoder_optimizer = torch.optim.RMSprop(encoder.parameters(),
                                                lr=learning_rate)
        decoder_optimizer = torch.optim.RMSprop(decoder.parameters(),
                                                lr=learning_rate)

    # Mean Squared Loss
    criterion = nn.MSELoss().to(device)

    if mode == 'train':

        # Using summary writer for logging
        summary = SummaryWriter()

        encoder.to(device)
        decoder.to(device)

        for epoch in range(start_epoch, start_epoch + num_epochs):

            # Early stopping if the model does not learn for consecutive 10 epochs
            if epochs_since_improvement == 10:
                break

            # Lower the learning rate by 0.2 after every 4th epoch with no learning
            if epochs_since_improvement > 0 and epochs_since_improvement % 4 == 0:
                adjust_learning_rate(encoder_optimizer, 0.8)

            # Train
            train(encoder, decoder, train_dataloader, encoder_optimizer,
                  decoder_optimizer, criterion, clip_gradient, device, epoch,
                  num_epochs, summary, loss_display_interval)

            # Validate
            current_loss = validate(encoder, decoder, validation_dataloader,
                                    criterion, device, epoch, num_epochs,
                                    summary, loss_display_interval)

            is_best = max_loss > current_loss
            max_loss = min(max_loss, current_loss)
            if not is_best:
                epochs_since_improvement += 1
                print("\nEpochs since last improvement: %d\n" %
                      (epochs_since_improvement, ))
            else:
                epochs_since_improvement = 0

            save_checkpoint(epoch, epochs_since_improvement, encoder, decoder,
                            encoder_optimizer, decoder_optimizer, is_best,
                            current_loss, base_name)

    else:
        # This part is used for the prurpose of visualizations.
        print('testing...')
        encoder.to(device)
        decoder.to(device)
        encoder.eval()
        decoder.eval()
        for batch_idx, data in enumerate(validation_dataloader):
            sequence = data['sequence'][0].unsqueeze(0).permute(1, 0,
                                                                2).to(device)
            seq_len = data['sequence_length'][0]
            x, (hidden_state, cell_state) = encoder(sequence)
            prediction = decoder(hidden_state)

            sequence = sequence.squeeze(1).detach().cpu().numpy()
            prediction = prediction.squeeze(1).detach().cpu().numpy()

            print(sequence.shape)
            hip_angles_gt = sequence[:seq_len, [0, 3]]
            knee_angles_gt = sequence[:seq_len, [1, 4]]
            ankle_angles_gt = sequence[:seq_len, [2, 5]]

            hip_angles_pred = prediction[:seq_len, [0, 3]]
            knee_angles_pred = prediction[:seq_len, [1, 4]]
            ankle_angles_pred = prediction[:seq_len, [2, 5]]

            time = np.arange(0, len(hip_angles_gt), 1)

            # fig, axs = plt.subplots(2)
            # fig.suptitle('Hip angle reconstruction')
            # axs[0].plot(time, hip_angles_gt[:,0])
            # axs[0].set_title('Left hip ground truth')
            # axs[1].plot(time, hip_angles_pred[:,0])
            # axs[1].set_title('Left hip prediction')

            plt.plot(time, ankle_angles_gt[:, 1], label='Ground truth')
            plt.plot(time, ankle_angles_pred[:, 1], label='Prediction')
            plt.title('Right-ankle angle reconstruction')
            plt.legend()

            # axs[0].plot(time, hip_angles_gt[:,1])
            # axs[0].set_title('Right hip ground truth')
            # axs[1].plot(time, hip_angles_pred[:,1])
            # axs[1].set_title('Right hip prediction')

            # fig.suptitle('Knee angle reconstruction')
            # axs[0].plot(time, knee_angles_gt[:,0])
            # axs[0].set_title('Left knee ground truth')
            # axs[1].plot(time, knee_angles_pred[:,0])
            # axs[1].set_title('Left knee prediction')
            # axs[0].plot(time, knee_angles_gt[:,1])
            # axs[0].set_title('Right knee ground truth')
            # axs[1].plot(time, knee_angles_pred[:,1])
            # axs[1].set_title('Right knee prediction')

            # fig.suptitle('Ankle angle reconstruction')
            # axs[0].plot(time, ankle_angles_gt[:,0])
            # axs[0].set_title('Left ankle ground truth')
            # axs[1].plot(time, ankle_angles_pred[:,0])
            # axs[1].set_title('Left ankle prediction')
            # axs[0].plot(time, ankle_angles_gt[:,1])
            # axs[0].set_title('Right ankle ground truth')
            # axs[1].plot(time, ankle_angles_pred[:,1])
            # axs[1].set_title('Right ankle prediction')

            plt.show()

            break