コード例 #1
0
 def _reconstruct_img(self, flat_img):
     latent_distribution_params = self.vae.encode(
         ptu.from_numpy(flat_img.reshape(1, -1)))
     reconstructions, _ = self.vae.decode(latent_distribution_params[0])
     imgs = ptu.get_numpy(reconstructions)
     imgs = imgs.reshape(1, self.input_channels, self.imsize, self.imsize)
     return imgs[0]
コード例 #2
0
 def _update_info(self, info, obs):
     latent_distribution_params = self.vae.encode(
         ptu.from_numpy(obs[self.vae_input_observation_key].reshape(1, -1)))
     latent_obs, logvar = ptu.get_numpy(latent_distribution_params[0])[0], \
                          ptu.get_numpy(latent_distribution_params[1])[0]
     # assert (latent_obs == obs['latent_observation']).all()
     latent_goal = self.desired_goal['latent_desired_goal']
     dist = latent_goal - latent_obs
     var = np.exp(logvar.flatten())
     var = np.maximum(var, self.reward_min_variance)
     err = dist * dist / 2 / var
     mdist = np.sum(err)  # mahalanobis distance
     info["vae_mdist"] = mdist
     info["vae_success"] = 1 if mdist < self.epsilon else 0
     info["vae_dist"] = np.linalg.norm(dist, ord=self.norm_order)
     info["vae_dist_l1"] = np.linalg.norm(dist, ord=1)
     info["vae_dist_l2"] = np.linalg.norm(dist, ord=2)
コード例 #3
0
def reconstruct_img(flat_img):
    latent_distribution_params = vae.encode(
        ptu.from_numpy(flat_img.reshape(1, -1)).cuda())
    reconstructions, _ = vae.decode(latent_distribution_params[0])
    imgs = ptu.get_numpy(reconstructions)
    imgs = imgs.reshape(1, vae.input_channels, vae.imsize,
                        vae.imsize).transpose(0, 3, 2, 1)  # BCWH -> BHWC
    img = cv2.cvtColor(imgs[0], cv2.COLOR_RGB2BGR)
    return img
コード例 #4
0
def get_latent(raw_image):
    """Get latent variables (mean vector)"""
    image = cv2.resize(raw_image, (vae.imsize, vae.imsize))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = normalize_image(image)
    # swap order and reshape
    flat_img = torch.from_numpy(image).permute(2, 1,
                                               0).flatten(start_dim=1).numpy()
    latent_distribution_params = vae.encode(
        ptu.from_numpy(flat_img.reshape(1, -1)).cuda())
    latents = ptu.get_numpy(latent_distribution_params[0])
    return latents
コード例 #5
0
ファイル: core.py プロジェクト: DrawZeroPoint/RoRL
def np_ify(tensor_or_other):
    if isinstance(tensor_or_other, torch.autograd.Variable):
        return ptu.get_numpy(tensor_or_other)
    else:
        return tensor_or_other
コード例 #6
0
 def _encode(self, imgs):
     latent_distribution_params = self.vae.encode(ptu.from_numpy(imgs))
     return ptu.get_numpy(latent_distribution_params[0])
コード例 #7
0
 def _decode(self, latents):
     reconstructions, _ = self.vae.decode(ptu.from_numpy(latents))
     decoded = ptu.get_numpy(reconstructions)
     return decoded
コード例 #8
0
ファイル: td3.py プロジェクト: DrawZeroPoint/RoRL
    def train_from_torch(self, batch):
        rewards = batch['rewards']
        terminals = batch['terminals']
        obs = batch['observations']
        actions = batch['actions']
        next_obs = batch['next_observations']
        """
        Critic operations.
        """

        next_actions = self.target_policy(next_obs)
        noise = ptu.randn(next_actions.shape) * self.target_policy_noise
        noise = torch.clamp(noise, -self.target_policy_noise_clip,
                            self.target_policy_noise_clip)
        noisy_next_actions = next_actions + noise

        target_q1_values = self.target_qf1(next_obs, noisy_next_actions)
        target_q2_values = self.target_qf2(next_obs, noisy_next_actions)
        target_q_values = torch.min(target_q1_values, target_q2_values)
        q_target = self.reward_scale * rewards + (
            1. - terminals) * self.discount * target_q_values
        q_target = q_target.detach()

        q1_pred = self.qf1(obs, actions)
        bellman_errors_1 = (q1_pred - q_target)**2
        qf1_loss = bellman_errors_1.mean()

        q2_pred = self.qf2(obs, actions)
        bellman_errors_2 = (q2_pred - q_target)**2
        qf2_loss = bellman_errors_2.mean()
        """
        Update Networks
        """
        self.qf1_optimizer.zero_grad()
        qf1_loss.backward()
        self.qf1_optimizer.step()

        self.qf2_optimizer.zero_grad()
        qf2_loss.backward()
        self.qf2_optimizer.step()

        policy_actions = policy_loss = None
        if self._n_train_steps_total % self.policy_and_target_update_period == 0:
            policy_actions = self.policy(obs)
            q_output = self.qf1(obs, policy_actions)
            policy_loss = -q_output.mean()

            self.policy_optimizer.zero_grad()
            policy_loss.backward()
            self.policy_optimizer.step()

            ptu.soft_update_from_to(self.policy, self.target_policy, self.tau)
            ptu.soft_update_from_to(self.qf1, self.target_qf1, self.tau)
            ptu.soft_update_from_to(self.qf2, self.target_qf2, self.tau)

        if self._need_to_update_eval_statistics:
            self._need_to_update_eval_statistics = False
            if policy_loss is None:
                policy_actions = self.policy(obs)
                q_output = self.qf1(obs, policy_actions)
                policy_loss = -q_output.mean()

            self.eval_statistics['QF1 Loss'] = np.mean(ptu.get_numpy(qf1_loss))
            self.eval_statistics['QF2 Loss'] = np.mean(ptu.get_numpy(qf2_loss))
            self.eval_statistics['Policy Loss'] = np.mean(
                ptu.get_numpy(policy_loss))
            self.eval_statistics.update(
                create_stats_ordered_dict(
                    'Q1 Predictions',
                    ptu.get_numpy(q1_pred),
                ))
            self.eval_statistics.update(
                create_stats_ordered_dict(
                    'Q2 Predictions',
                    ptu.get_numpy(q2_pred),
                ))
            self.eval_statistics.update(
                create_stats_ordered_dict(
                    'Q Targets',
                    ptu.get_numpy(q_target),
                ))
            self.eval_statistics.update(
                create_stats_ordered_dict(
                    'Bellman Errors 1',
                    ptu.get_numpy(bellman_errors_1),
                ))
            self.eval_statistics.update(
                create_stats_ordered_dict(
                    'Bellman Errors 2',
                    ptu.get_numpy(bellman_errors_2),
                ))
            self.eval_statistics.update(
                create_stats_ordered_dict(
                    'Policy Action',
                    ptu.get_numpy(policy_actions),
                ))
        self._n_train_steps_total += 1