예제 #1
0
    def train_from_torch(self, batch):
        obs = batch[self._obs_key]
        action = batch[self._action_key]
        next_obs = batch[self._next_obs_key]
        goal = batch[self._goal_key]
        achieved = self._state_to_goal_fn(next_obs)
        not_reached = (torch.norm(
            (achieved - goal), dim=1) > self.equality_threshold)

        not_reached_logit = self.model(obs, action, return_logits=True)
        not_reached_logit = not_reached_logit[:, 0]
        loss = self._criterion(not_reached_logit, not_reached.to(torch.float))

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        if self._need_to_update_eval_statistics:
            self._need_to_update_eval_statistics = False
            self._eval_statistics['loss'] = np.mean(ptu.get_numpy(loss))
            not_reached_predicted = torch.sigmoid(not_reached_logit)
            self._eval_statistics.update(
                create_stats_ordered_dict(
                    'discount_predicted',
                    ptu.get_numpy(not_reached_predicted),
                ))
            self._eval_statistics.update(
                create_stats_ordered_dict('not_reached/mean',
                                          np.mean(ptu.get_numpy(not_reached))))
예제 #2
0
    def _statistics_from_batch(self, batch, stat_prefix):
        statistics = OrderedDict()

        train_dict = self.get_train_dict(batch)
        for name in [
            'Policy Loss',
        ]:
            tensor = train_dict[name]
            statistics_name = "{} {} Mean".format(stat_prefix, name)
            statistics[statistics_name] = np.mean(ptu.get_numpy(tensor))

        for name in [
            'QF Outputs',
            'Policy Actions',
        ]:
            tensor = train_dict[name]
            statistics.update(create_stats_ordered_dict(
                '{} {}'.format(stat_prefix, name),
                ptu.get_numpy(tensor)
            ))

        statistics.update(create_stats_ordered_dict(
            "{} Env Actions".format(stat_prefix),
            ptu.get_numpy(batch['actions'])
        ))

        return statistics
예제 #3
0
파일: high_low.py 프로젝트: jcoreyes/erl
    def log_diagnostics(self, paths):
        final_values = []
        final_unclipped_rewards = []
        final_rewards = []
        for path in paths:
            final_value = path["actions"][-1][0]
            final_values.append(final_value)
            score = path["observations"][0][0] * final_value
            final_unclipped_rewards.append(score)
            final_rewards.append(clip_magnitude(score, 1))

        last_statistics = OrderedDict()
        last_statistics.update(
            create_stats_ordered_dict(
                'Final Value',
                final_values,
            ))
        last_statistics.update(
            create_stats_ordered_dict(
                'Unclipped Final Rewards',
                final_unclipped_rewards,
            ))
        last_statistics.update(
            create_stats_ordered_dict(
                'Final Rewards',
                final_rewards,
            ))

        for key, value in last_statistics.items():
            logger.record_tabular(key, value)

        return final_unclipped_rewards
예제 #4
0
 def log_diagnostics(self, paths, logger=default_logger):
     statistics = OrderedDict()
     for name_in_env_infos, name_to_log in [
         ('distance_to_target', 'Distance to Target'),
         ('speed', 'Speed'),
         ('distance_reward', 'Distance Reward'),
         ('action_reward', 'Action Reward'),
     ]:
         stat = get_stat_in_paths(paths, 'env_infos', name_in_env_infos)
         statistics.update(create_stats_ordered_dict(
             name_to_log,
             stat,
         ))
     distances = get_stat_in_paths(paths, 'env_infos', 'distance_to_target')
     statistics.update(
         create_stats_ordered_dict(
             "Final Distance to Target",
             [ds[-1] for ds in distances],
         ))
     statistics.update(
         create_stats_ordered_dict(
             "Path Lengths",
             get_path_lengths(paths),
         ))
     for key, value in statistics.items():
         logger.record_tabular(key, value)
예제 #5
0
 def _statistics_from_paths(self, paths, stat_prefix):
     eval_replay_buffer = UpdatableSubtrajReplayBuffer(
         len(paths) * (self.max_path_length + 1),
         self.env,
         self.subtraj_length,
         self.memory_dim,
     )
     for path in paths:
         eval_replay_buffer.add_trajectory(path)
     raw_subtraj_batch = eval_replay_buffer.get_all_valid_subtrajectories()
     assert raw_subtraj_batch is not None
     subtraj_batch = create_torch_subtraj_batch(raw_subtraj_batch)
     if self.save_memory_gradients:
         subtraj_batch['memories'].requires_grad = True
     statistics = self._statistics_from_subtraj_batch(
         subtraj_batch, stat_prefix=stat_prefix)
     statistics.update(
         eval_util.get_generic_path_information(
             paths,
             stat_prefix="Test",
         ))
     env_actions = np.vstack(
         [path["actions"][:self.action_dim] for path in paths])
     writes = np.vstack(
         [path["actions"][self.action_dim:] for path in paths])
     statistics.update(
         create_stats_ordered_dict('Env Actions',
                                   env_actions,
                                   stat_prefix=stat_prefix))
     statistics.update(
         create_stats_ordered_dict('Writes',
                                   writes,
                                   stat_prefix=stat_prefix))
     return statistics
예제 #6
0
    def debug_statistics(self):
        """
        Given an image $$x$$, samples a bunch of latents from the prior
        $$z_i$$ and decode them $$\hat x_i$$.
        Compare this to $$\hat x$$, the reconstruction of $$x$$.
        Ideally
         - All the $$\hat x_i$$s do worse than $$\hat x$$ (makes sure VAE
           isn’t ignoring the latent)
         - Some $$\hat x_i$$ do better than other $$\hat x_i$$ (tests for
           coverage)
        """
        debug_batch_size = 64
        data = self.get_batch(train=False)
        reconstructions, _, _ = self.model(data)
        img = data[0]
        recon_mse = ((reconstructions[0] - img)**2).mean().view(-1)
        img_repeated = img.expand((debug_batch_size, img.shape[0]))

        samples = ptu.randn(debug_batch_size, self.representation_size)
        random_imgs, _ = self.model.decode(samples)
        random_mses = (random_imgs - img_repeated)**2
        mse_improvement = ptu.get_numpy(random_mses.mean(dim=1) - recon_mse)
        stats = create_stats_ordered_dict(
            'debug/MSE improvement over random',
            mse_improvement,
        )
        stats.update(
            create_stats_ordered_dict(
                'debug/MSE of random decoding',
                ptu.get_numpy(random_mses),
            ))
        stats['debug/MSE of reconstruction'] = ptu.get_numpy(recon_mse)[0]
        return stats
예제 #7
0
    def log_diagnostics(self, paths, **kwargs):
        list_of_rewards, terminals, obs, actions, next_obs = split_paths(paths)

        returns = []
        for rewards in list_of_rewards:
            returns.append(np.sum(rewards))
        statistics = OrderedDict()
        statistics.update(
            create_stats_ordered_dict(
                'Undiscounted Returns',
                returns,
            ))
        statistics.update(
            create_stats_ordered_dict(
                'Rewards',
                list_of_rewards,
            ))
        statistics.update(create_stats_ordered_dict(
            'Actions',
            actions,
        ))

        fraction_of_time_on_platform = [o[1] for o in obs]
        statistics['Fraction of time on platform'] = np.mean(
            fraction_of_time_on_platform)

        for key, value in statistics.items():
            logger.record_tabular(key, value)
        return returns
예제 #8
0
    def log_diagnostics(self, paths, **kwargs):
        list_of_rewards, terminals, obs, actions, next_obs = split_paths(paths)

        returns = []
        for rewards in list_of_rewards:
            returns.append(np.sum(rewards))
        last_statistics = OrderedDict()
        last_statistics.update(
            create_stats_ordered_dict(
                'UndiscountedReturns',
                returns,
            ))
        last_statistics.update(
            create_stats_ordered_dict(
                'Rewards',
                list_of_rewards,
            ))
        last_statistics.update(create_stats_ordered_dict(
            'Actions',
            actions,
        ))

        for key, value in last_statistics.items():
            logger.record_tabular(key, value)
        return returns
예제 #9
0
    def __call__(self, paths: List[Path],
                 contexts: List[Context]) -> Diagnostics:
        goals = [c[self._desired_goal_key] for c in contexts]
        achieved_goals = [
            np.array([o[self._achieved_goal_key] for o in path['observations']])
            for path in paths
        ]

        statistics = OrderedDict()
        stat_to_lists = defaultdict(list)
        for achieved, goal in zip(achieved_goals, goals):
            difference = achieved - goal
            x_difference = difference[..., :1]
            y_difference = difference[..., 1:2]
            z_difference = difference[..., 2:3]
            joint_difference = difference[..., 3:6]
            stat_to_lists['x/distance'].append(
                np.linalg.norm(x_difference, axis=-1)
            )
            stat_to_lists['y/distance'].append(
                np.linalg.norm(y_difference, axis=-1)
            )
            stat_to_lists['z/distance'].append(
                np.linalg.norm(z_difference, axis=-1)
            )
            stat_to_lists['joint/distance'].append(
                np.linalg.norm(joint_difference, axis=-1)
            )
            stat_to_lists['x/success'].append(
                np.linalg.norm(x_difference, axis=-1)
                <= self.success_threshold
            )
            stat_to_lists['y/success'].append(
                np.linalg.norm(y_difference, axis=-1)
                <= self.success_threshold
            )
            stat_to_lists['z/success'].append(
                np.linalg.norm(z_difference, axis=-1)
                <= self.success_threshold
            )
            stat_to_lists['joint/success'].append(
                np.linalg.norm(joint_difference, axis=-1)
                <= self.success_threshold
            )
        for stat_name, stat_list in stat_to_lists.items():
            statistics.update(create_stats_ordered_dict(
                stat_name,
                stat_list,
                always_show_all_stats=True,
            ))
            statistics.update(create_stats_ordered_dict(
                '{}/final'.format(stat_name),
                [s[-1:] for s in stat_list],
                always_show_all_stats=True,
                exclude_max_min=True,
            ))
        return statistics
예제 #10
0
    def compute_loss(self,
                     batch,
                     skip_statistics=False) -> Tuple[Loss, LossStatistics]:
        vae_terms = compute_vae_terms(self.vae, batch[self.data_key])
        kl = vae_terms.kl
        likelihood = vae_terms.likelihood
        set_loss = compute_set_loss(self.vae, batch[self.set_key])
        total_loss = (-likelihood + self._beta * kl +
                      self.set_loss_weight * set_loss)

        eval_statistics = OrderedDict()
        if not skip_statistics:
            eval_statistics['log_prob'] = np.mean(ptu.get_numpy(likelihood))
            eval_statistics['kl'] = np.mean(ptu.get_numpy(kl))
            eval_statistics['set_loss'] = np.mean(ptu.get_numpy(set_loss))
            eval_statistics['loss'] = np.mean(ptu.get_numpy(total_loss))
            eval_statistics['beta'] = self._beta
            for k, v in vae_terms.p_x_given_z.get_diagnostics().items():
                eval_statistics['p_x_given_z/{}'.format(k)] = v
            for k, v in vae_terms.q_z.get_diagnostics().items():
                eval_statistics['q_z_given_x/{}'.format(k)] = v
            for name, set_list in [
                ('eval', self.eval_sets),
                ('train', self.train_sets),
            ]:
                for set_i, set in enumerate(set_list):
                    vae_terms = compute_vae_terms(self.vae, set)
                    kl = vae_terms.kl
                    likelihood = vae_terms.likelihood
                    set_loss = compute_set_loss(self.vae, set)
                    eval_statistics['{}/set{}/log_prob'.format(
                        name, set_i)] = np.mean(ptu.get_numpy(likelihood))
                    eval_statistics['{}/set{}/kl'.format(name,
                                                         set_i)] = np.mean(
                                                             ptu.get_numpy(kl))
                    eval_statistics['{}/set{}/set_loss'.format(
                        name, set_i)] = (np.mean(ptu.get_numpy(set_loss)))
                    set_prior = compute_prior(self.vae.encoder(set))
                    eval_statistics.update(
                        create_stats_ordered_dict(
                            '{}/set{}/learned_prior/mean'.format(name, set_i),
                            ptu.get_numpy(set_prior.mean)))
                    eval_statistics.update(
                        create_stats_ordered_dict(
                            '{}/set{}/learned_prior/stddev'.format(
                                name, set_i), ptu.get_numpy(set_prior.stddev)))
                    for k, v in vae_terms.p_x_given_z.get_diagnostics().items(
                    ):
                        eval_statistics['{}/set{}/p_x_given_z/{}'.format(
                            name, set_i, k)] = v
                    for k, v in vae_terms.q_z.get_diagnostics().items():
                        eval_statistics['{}/set{}/q_z_given_x/{}'.format(
                            name, set_i, k)] = v

        return total_loss, eval_statistics
예제 #11
0
    def _do_training(self):
        tmp_batch = self.get_batch()
        random_state = tmp_batch['observations']

        losses = []
        batch = self.get_batch()
        obs = batch['observations']
        actions = batch['actions']
        next_obs = batch['next_observations']
        ob_deltas_pred = self.model(obs, actions)

        next_obs_pred = obs + ob_deltas_pred
        if self.vectorized:
            distance_to_random_state_pred = ((next_obs_pred - random_state)**2)
            distance_to_random_state = ((next_obs - random_state)**2)

            squared_errors = (distance_to_random_state_pred -
                              distance_to_random_state)**2
            loss = squared_errors.mean()
        else:
            distance_to_random_state_pred = ((next_obs_pred -
                                              random_state)**2).sum(
                                                  1, keepdim=True)
            distance_to_random_state = ((next_obs - random_state)**2).sum(
                1, keepdim=True)

            squared_errors = (distance_to_random_state_pred -
                              distance_to_random_state)**2
            loss = squared_errors.mean()

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        losses.append(ptu.get_numpy(loss))

        if self.eval_statistics is None:
            self.eval_statistics = OrderedDict()
            self.eval_statistics.update(
                create_stats_ordered_dict(
                    'Model Loss',
                    losses,
                    always_show_all_stats=True,
                    exclude_max_min=True,
                ))
            self.eval_statistics.update(
                create_stats_ordered_dict(
                    'Distance To Random State',
                    ptu.get_numpy(distance_to_random_state),
                ))
            self.eval_statistics.update(
                create_stats_ordered_dict(
                    'Distance To Random State Predicted',
                    ptu.get_numpy(distance_to_random_state_pred),
                ))
예제 #12
0
    def _do_training(self):
        batch = self.get_batch()
        """
        Optimize Critic/Actor.
        """
        rewards = batch['rewards']
        terminals = batch['terminals']
        obs = batch['observations']
        actions = batch['actions']
        next_obs = batch['next_observations']

        _, _, v_pred = self.target_policy(next_obs, None)
        y_target = self.reward_scale * rewards + (
            1. - terminals) * self.discount * v_pred
        y_target = y_target.detach()
        mu, y_pred, v = self.policy(obs, actions)
        policy_loss = self.policy_criterion(y_pred, y_target)

        self.policy_optimizer.zero_grad()
        policy_loss.backward()
        self.policy_optimizer.step()
        """
        Update Target Networks
        """
        if self.use_soft_update:
            ptu.soft_update_from_to(self.policy, self.target_policy, self.tau)
        else:
            if self._n_train_steps_total % self.target_hard_update_period == 0:
                ptu.copy_model_params_from_to(self.policy, self.target_policy)

        if self.need_to_update_eval_statistics:
            self.need_to_update_eval_statistics = False
            self.eval_statistics['Policy Loss'] = np.mean(
                ptu.get_numpy(policy_loss))
            self.eval_statistics.update(
                create_stats_ordered_dict(
                    'Policy v',
                    ptu.get_numpy(v),
                ))
            self.eval_statistics.update(
                create_stats_ordered_dict(
                    'Policy mu',
                    ptu.get_numpy(mu),
                ))
            self.eval_statistics.update(
                create_stats_ordered_dict(
                    'Y targets',
                    ptu.get_numpy(y_target),
                ))
            self.eval_statistics.update(
                create_stats_ordered_dict(
                    'Y predictions',
                    ptu.get_numpy(y_pred),
                ))
예제 #13
0
 def get_diagnostics(self):
     stats = OrderedDict()
     stats.update(
         create_stats_ordered_dict(
             'mean',
             ptu.get_numpy(self.mean),
             # exclude_max_min=True,
         ))
     stats.update(
         create_stats_ordered_dict(
             'std',
             ptu.get_numpy(self.distribution.stddev),
         ))
     return stats
예제 #14
0
    def _do_training(self):
        batch = self.get_batch()
        obs = batch['observations']
        actions = batch['actions']
        next_obs = batch['next_observations']
        """
        Policy operations.
        """
        inputs = torch.cat((obs, self.env.convert_obs_to_goals(next_obs)),
                           dim=1)
        policy_actions = self.policy(inputs)
        policy_loss = self.policy_criterion(policy_actions, actions)
        """
        Update Networks
        """
        self.policy_optimizer.zero_grad()
        policy_loss.backward()
        self.policy_optimizer.step()

        if self.need_to_update_eval_statistics:
            self.need_to_update_eval_statistics = False
            """
            This way, these statistics are only computed for one batch.
            """
            self.eval_statistics = OrderedDict()
            self.eval_statistics['Policy Loss'] = np.mean(
                ptu.get_numpy(policy_loss))
            self.eval_statistics.update(
                create_stats_ordered_dict(
                    'Policy Action',
                    ptu.get_numpy(policy_actions),
                ))
예제 #15
0
 def get_diagnostics(self):
     stats = OrderedDict()
     stats.update(
         create_stats_ordered_dict(
             'mean',
             ptu.get_numpy(self.mean),
         ))
     stats.update(
         create_stats_ordered_dict('normal/std',
                                   ptu.get_numpy(self.normal_std)))
     stats.update(
         create_stats_ordered_dict(
             'normal/log_std',
             ptu.get_numpy(torch.log(self.normal_std)),
         ))
     return stats
예제 #16
0
 def log_diagnostics(self, paths, logger=default_logger):
     lms = get_stat_in_paths(paths, 'agent_infos', 'lagrange_multiplier')
     for key, value in create_stats_ordered_dict(
             "TDM LBFGS Lagrange Multiplier",
             lms,
     ).items():
         logger.record_tabular(key, value)
예제 #17
0
    def _statistics_from_subtraj_batch(self, subtraj_batch, stat_prefix=''):
        statistics = OrderedDict()

        critic_dict = self.get_critic_output_dict(subtraj_batch)
        for name, tensor in critic_dict.items():
            statistics.update(
                create_stats_ordered_dict('{} QF {}'.format(stat_prefix, name),
                                          ptu.get_numpy(tensor)))

        policy_dict = self.get_policy_output_dict(subtraj_batch)
        for name, tensor in policy_dict.items():
            statistics.update(
                create_stats_ordered_dict(
                    '{} Policy {}'.format(stat_prefix, name),
                    ptu.get_numpy(tensor)))
        return statistics
예제 #18
0
 def save_gradient_norm(gradient):
     if self.need_to_update_eval_statistics:
         self.extra_eval_statistics.update(
             create_stats_ordered_dict(
                 key,
                 ptu.get_numpy(gradient.data.norm(p=2, dim=1)),
                 always_show_all_stats=True,
             ))
예제 #19
0
    def __call__(self, paths: List[Path],
                 contexts: List[Context]) -> Diagnostics:
        goals = [c[self._desired_goal_key] for c in contexts]
        achieved_goals = [
            np.array(
                [o[self._achieved_goal_key] for o in path['observations']])
            for path in paths
        ]

        statistics = OrderedDict()
        stat_to_lists = defaultdict(list)
        for achieved, goal in zip(achieved_goals, goals):
            difference = achieved - goal
            distance = np.linalg.norm(difference, axis=-1)
            stat_to_lists['distance'].append(distance)
            stat_to_lists['success'].append(
                distance <= self._success_threshold)
        for stat_name, stat_list in stat_to_lists.items():
            statistics.update(
                create_stats_ordered_dict(
                    stat_name,
                    stat_list,
                    always_show_all_stats=True,
                ))
            statistics.update(
                create_stats_ordered_dict(
                    '{}/final'.format(stat_name),
                    [s[-1:] for s in stat_list],
                    always_show_all_stats=True,
                    exclude_max_min=True,
                ))
        statistics.update(
            create_stats_ordered_dict(
                '{}/initial'.format('distance'),
                [s[:1] for s in stat_to_lists['distance']],
                always_show_all_stats=True,
                exclude_max_min=True,
            ))
        statistics.update(
            create_stats_ordered_dict(
                '{}/any'.format('success'),
                [any(s) for s in stat_to_lists['success']],
                always_show_all_stats=True,
                exclude_max_min=True,
            ))
        return statistics
예제 #20
0
 def get_diagnostics(self):
     stats = OrderedDict()
     stats.update(
         create_stats_ordered_dict(
             'probability',
             ptu.get_numpy(self.probs),
         ))
     return stats
예제 #21
0
파일: reacher_env.py 프로젝트: jcoreyes/erl
 def log_diagnostics(self, paths, logger=default_logger):
     statistics = OrderedDict()
     for name_in_env_infos, name_to_log in [
         ('reward_dist', 'Distance Reward'),
         ('reward_ctrl', 'Action Reward'),
     ]:
         stat = get_stat_in_paths(paths, 'env_infos', name_in_env_infos)
         statistics.update(create_stats_ordered_dict(
             name_to_log,
             stat,
         ))
     distances = get_stat_in_paths(paths, 'env_infos', 'reward_dist')
     statistics.update(create_stats_ordered_dict(
         "Final Distance Reward",
         [ds[-1] for ds in distances],
     ))
     for key, value in statistics.items():
         logger.record_tabular(key, value)
예제 #22
0
 def get_diagnostics(self):
     stats = OrderedDict()
     stats.update(
         create_stats_ordered_dict(
             'alpha',
             ptu.get_numpy(self.concentration0),
         ))
     stats.update(
         create_stats_ordered_dict(
             'beta',
             ptu.get_numpy(self.concentration1),
         ))
     stats.update(
         create_stats_ordered_dict(
             'entropy',
             ptu.get_numpy(self.entropy()),
         ))
     return stats
예제 #23
0
 def log_diagnostics(self, paths, logger=default_logger):
     statistics = OrderedDict()
     for name_in_env_infos, name_to_log in [
         ('posafter', 'Position'),
         ('height', 'Height'),
         ('angle', 'Angle'),
     ]:
         stats = get_stat_in_paths(paths, 'env_infos', name_in_env_infos)
         statistics.update(create_stats_ordered_dict(
             name_to_log,
             stats,
         ))
         statistics.update(
             create_stats_ordered_dict(
                 "Final " + name_to_log,
                 [s[-1] for s in stats],
             ))
     for key, value in statistics.items():
         logger.record_tabular(key, value)
예제 #24
0
 def _compute_target_q_value(
     self,
     discount,
     rewards,
     terminals,
     bootstrap_value,
     statistics_log,
     update_statistics,
 ):
     scaled_rewards = rewards * self.reward_scale
     del rewards
     if self.reward_type == self.NORMAL_REWARD:
         reward_target = scaled_rewards
     elif self.reward_type == self.DISCOUNTED_REWARD:
         reward_target = scaled_rewards * (1 - discount)
     elif self.reward_type == self.DISCOUNTED_PLUS_TIME_KL:
         kl_reward = kl_divergence(
             Bernoulli(discount),
             self.prior_on_discount,
         )
         reward_target = (scaled_rewards * (1 - discount) + kl_reward)
         if update_statistics:
             statistics_log.update(
                 create_stats_ordered_dict(
                     'time_kl_reward',
                     ptu.get_numpy(kl_reward),
                 ))
             statistics_log.update(
                 create_stats_ordered_dict(
                     'inferred_discount',
                     ptu.get_numpy(discount),
                 ))
     else:
         raise ValueError("Unknown update type".format(self.reward_type))
     if self._multiply_bootstrap_by_prior_discount:
         bootstrap_target = ((1. - terminals) * discount * bootstrap_value *
                             self.discount)
     else:
         bootstrap_target = ((1. - terminals) * discount * bootstrap_value)
     q_target = reward_target + bootstrap_target
     return q_target
예제 #25
0
    def log_diagnostics(self, paths):
        target_onehots = []
        for path in paths:
            first_observation = path["observations"][0][:self.n+1]
            target_onehots.append(first_observation)

        final_predictions = []  # each element has shape (dim)
        nonfinal_predictions = []  # each element has shape (seq_length-1, dim)
        for path in paths:
            actions = path["actions"]
            if self._softmax_action:
                actions = softmax(actions, axis=-1)
            final_predictions.append(actions[-1])
            nonfinal_predictions.append(actions[:-1])
        nonfinal_predictions_sequence_dimension_flattened = np.vstack(
            nonfinal_predictions
        )  # shape = N X dim
        nonfinal_prob_zero = [softmax[0] for softmax in
                              nonfinal_predictions_sequence_dimension_flattened]
        final_probs_correct = []
        for final_prediction, target_onehot in zip(final_predictions,
                                                   target_onehots):
            correct_pred_idx = np.argmax(target_onehot)
            final_probs_correct.append(final_prediction[correct_pred_idx])
        final_prob_zero = [softmax[0] for softmax in final_predictions]

        last_statistics = OrderedDict()
        last_statistics.update(create_stats_ordered_dict(
            'Final P(correct)',
            final_probs_correct))
        last_statistics.update(create_stats_ordered_dict(
            'Non-final P(zero)',
            nonfinal_prob_zero))
        last_statistics.update(create_stats_ordered_dict(
            'Final P(zero)',
            final_prob_zero))

        for key, value in last_statistics.items():
            logger.record_tabular(key, value)

        return final_probs_correct
예제 #26
0
 def get_diagnostics(self, paths):
     statistics = OrderedDict()
     for stat_name_in_paths, stat_name_to_print in [
         ('arm_object_distance', 'Distance hand to object'),
         ('arm_goal_distance', 'Distance hand to goal'),
     ]:
         stats = get_stat_in_paths(paths, 'env_infos', stat_name_in_paths)
         statistics.update(
             create_stats_ordered_dict(
                 stat_name_to_print,
                 stats,
                 always_show_all_stats=True,
             ))
         final_stats = [s[-1] for s in stats]
         statistics.update(
             create_stats_ordered_dict(
                 "Final " + stat_name_to_print,
                 final_stats,
                 always_show_all_stats=True,
             ))
     return statistics
예제 #27
0
    def compute_loss(self, batch, epoch=-1, test=False):
        prefix = "test/" if test else "train/"

        positives = self.positives.random_batch(
            self.batch_size)["observations"]
        P, feature_size = positives.shape
        positives = ptu.from_numpy(positives)
        negatives = batch['observations']
        N, feature_size = negatives.shape

        X = torch.cat((positives, negatives))
        Y = np.zeros((P + N, 1))
        Y[:P, 0] = 1
        # Y[P:, 0] = 0

        # X = ptu.from_numpy(X)
        Y = ptu.from_numpy(Y)
        y_pred = self.GAIL_discriminator_logits(X)

        loss = self.loss_fn(y_pred, Y)

        y_pred_class = (y_pred > 0).float()

        self.update_with_classification_stats(y_pred_class, Y, prefix)
        self.eval_statistics.update(
            create_stats_ordered_dict(
                "y_pred_positives",
                ptu.get_numpy(y_pred[:P]),
            ))
        self.eval_statistics.update(
            create_stats_ordered_dict(
                "y_pred_negatives",
                ptu.get_numpy(y_pred[P:]),
            ))

        self.eval_statistics['epoch'] = epoch
        self.eval_statistics[prefix + "losses"].append(loss.item())

        return loss
예제 #28
0
 def get_diagnostics(self):
     path_lens = [len(path['actions']) for path in self._epoch_paths]
     stats = OrderedDict([
         ('num steps total', self._num_steps_total),
         ('num paths total', self._num_paths_total),
     ])
     stats.update(
         create_stats_ordered_dict(
             "path length",
             path_lens,
             always_show_all_stats=True,
         ))
     return stats
예제 #29
0
 def get_diagnostics(self):
     if self._vae_sample_probs is None or self._vae_sample_priorities is None:
         stats = create_stats_ordered_dict(
             'VAE Sample Weights',
             np.zeros(self._size),
         )
         stats.update(create_stats_ordered_dict(
             'VAE Sample Probs',
             np.zeros(self._size),
         ))
     else:
         vae_sample_priorities = self._vae_sample_priorities[:self._size]
         vae_sample_probs = self._vae_sample_probs[:self._size]
         stats = create_stats_ordered_dict(
             'VAE Sample Weights',
             vae_sample_priorities,
         )
         stats.update(create_stats_ordered_dict(
             'VAE Sample Probs',
             vae_sample_probs,
         ))
     return stats
예제 #30
0
 def get_diagnostics(self):
     stats = OrderedDict()
     stats.update(
         create_stats_ordered_dict(
             'mean',
             ptu.get_numpy(self.mean),
         ))
     stats.update(
         create_stats_ordered_dict(
             'std',
             ptu.get_numpy(self.distribution.stddev),
         ))
     stats.update(
         create_stats_ordered_dict(
             'log_std',
             ptu.get_numpy(torch.log(self.distribution.stddev)),
         ))
     stats.update(
         create_stats_ordered_dict(
             'entropy',
             ptu.get_numpy(self.entropy()),
         ))
     return stats