def _log_graph_distance_metrics(self, epoch):
     metric_logging.log_scalar_metrics(
         'graph', epoch, {
             'max_distance_from_start_in_history':
             self._max_distance_from_start,
             'min_distance_to_goal_in_history': self._min_distance_to_goal
         })
    def update_and_log(self, episodes, epoch):
        """

        Args:
            episodes: List of completed episodes (Agent/Trainer-dependent).
            epoch (int): epoch number
        """
        time_stamp = time.time()
        episode_min_goal_distance = []
        episode_max_start_distance = []
        for episode in episodes:
            distances_to_goal = list()
            distances_from_start = list()
            for observation in (
                    list(episode.transition_batch.observation) +
                [episode.transition_batch.next_observation[-1]]):
                state = self._graph_env.obs2state(observation)
                distances_to_goal.append(
                    self._distance_to_goal.get(state, float('inf')))
                distances_from_start.append(self._distance_from_start[state])
            episode_min_goal_distance.append(min(distances_to_goal))
            episode_max_start_distance.append(max(distances_from_start))
        self._max_distance_from_start = max(self._max_distance_from_start,
                                            max(episode_max_start_distance))
        self._min_distance_to_goal = min(self._min_distance_to_goal,
                                         min(episode_min_goal_distance))
        self._log_graph_distance_metrics(epoch)
        metric_logging.log_scalar_metrics(
            'run_time', epoch,
            {'update_and_log_graph_distances': time.time() - time_stamp})
 def _run_agent_train_epoch(self):
     time_stamp_before_train = time.time()
     metrics = self._trainer.train_epoch(self._network)
     metric_logging.log_scalar_metrics(
         'run_time',
         self._epoch,
         {'agent_train': time.time() - time_stamp_before_train}
     )
     metric_logging.log_scalar_metrics('agent_train',
                                       self._epoch,
                                       metrics
                                       )
 def _maybe_log_single_episode_visualization(self, episodes):
     if (
         self._log_rollout_every_n_epochs is not None and
         self._epoch % self._log_rollout_every_n_epochs == 0
     ):
         time_stamp_before_log = time.time()
         self._log_single_episode_visualization(episodes)
         time_diff = time.time() - time_stamp_before_log
         metric_logging.log_scalar_metrics(
             'run_time',
             self._epoch,
             {'single_rollout': time_diff}
         )
 def _maybe_log_transitions_visualizations(self, episodes):
     if (
         self._log_other_images_every_n_epochs is not None and
         self._epoch % self._log_other_images_every_n_epochs == 0
     ):
         time_stamp_before_log = time.time()
         self._log_hard_transitions_predictions_visualizations(episodes)
         self._log_top_priority_transitions()
         time_diff = time.time() - time_stamp_before_log
         metric_logging.log_scalar_metrics(
             'run_time',
             self._epoch,
             {'other_images': time_diff}
         )
    def _log_episodes_metrics_and_images(self, episodes):
        metric_logging.log_scalar_metrics(
            'agent',
            self._epoch,
            self._agent_class.compute_metrics(episodes)
        )
        metric_logging.log_scalar_metrics(
            'env',
            self._epoch,
            self._logging_env.compute_metrics(episodes)
        )
        metric_logging.log_scalar_metrics(
            'episode',
            self._epoch,
            self._compute_episode_metrics(episodes)
        )
        metric_logging.log_scalar_metrics(
            'episode_rolling',
            self._epoch,
            self._compute_metrics_across_epochs(episodes)
        )

        self._maybe_log_n_experienced_states(episodes)
        self._maybe_update_and_log_graph_distance_metrics(episodes)
        self._maybe_log_trainable_env_model_metrics_and_images(episodes)
        self._maybe_log_single_episode_visualization(episodes)
        self._maybe_log_agent_visit_heat_map(episodes)
        self._maybe_log_transitions_visualizations(episodes)
 def _maybe_log_agent_visit_heat_map(self, episodes):
     if (
         self._log_heat_map_every_n_epochs is not None and
         self._epoch % self._log_heat_map_every_n_epochs == 0
     ):
         time_stamp_before_log = time.time()
         self._log_agent_visit_heat_map(
             episodes, self._log_detailed_heat_map
         )
         time_diff = time.time() - time_stamp_before_log
         metric_logging.log_scalar_metrics(
             'run_time',
             self._epoch,
             {'heat_map': time_diff}
         )
 def _run_model_train_epoch(self, train_phase, train_epoch):
     time_stamp_before_train = time.time()
     model_train_metrics = self._model_trainer.train_epoch(
         self._model_network
     )
     time_diff = time.time() - time_stamp_before_train
     metric_logging.log_scalar_metrics(
         'run_time',
         train_epoch,
         {train_phase: time_diff}
     )
     metric_logging.log_scalar_metrics(
         train_phase,
         train_epoch,
         model_train_metrics
     )
    def _maybe_log_trainable_env_model_metrics_and_images(self, episodes):
        if self._model_network is not None:
            metrics = self._compute_trainable_model_metrics(episodes)
            metric_logging.log_scalar_metrics(
                'episode_model',
                self._epoch,
                metrics
            )

            if self._log_false_positive_images:
                time_stamp_before_log = time.time()
                self._log_transitions_with_false_positive_rewards(episodes)
                time_diff = time.time() - time_stamp_before_log
                metric_logging.log_scalar_metrics(
                    'run_time',
                    self._epoch,
                    {'false_positive_images': time_diff}
                )
Exemple #10
0
 def _run_episode_batch(self):
     time_stamp_before_run_agent = time.time()
     if self._model_network is not None:
         model_params = self._model_network.params
     else:
         model_params = None
     episodes = self._batch_stepper.run_episode_batch(
         self._network.params,
         model_params,
         epoch=max(0, self._epoch),
         time_limit=self._episode_time_limit
     )
     metric_logging.log_scalar_metrics(
         'run_time',
         self._epoch,
         {'episode_batch': time.time() - time_stamp_before_run_agent}
     )
     return episodes
Exemple #11
0
    def hard_eval_run(self, env):
        print('Hard eval started')

        env.env.scrambleSize = 100
        env.env.step_limit = 150
        self._n_passes = 8000

        count = 0
        solved = 0
        while True:
            beg_time = time.time()
            episode = yield from self.solve(env, count, dummy=True, hard=True)
            run_time = time.time() - beg_time
            solved += int(episode.solved)
            count += 1

            metric_logging.log_scalar_metrics(
                'hard_eval', count, {
                    'solved': solved / count,
                    'solving time': run_time,
                    'steps': 1 - episode.return_
                })
Exemple #12
0
    def run_epoch(self):
        """Runs a single epoch."""
        episodes = self._batch_stepper.run_episode_batch(
            self._network.params,
            epoch=max(0, self._epoch - self._n_precollect_epochs),
            time_limit=self._episode_time_limit
        )
        self._total_episodes += len(episodes)
        metric_logging.log_scalar_metrics(
            'episode',
            self._epoch,
            self._compute_episode_metrics(episodes)
        )
        metric_logging.log_scalar_metrics(
            'agent',
            self._epoch,
            self._agent_class.compute_metrics(episodes)
        )

        new_time_stamp = time.time()
        time_diff = new_time_stamp - self.time_stamp
        self.time_stamp = new_time_stamp

        metric_logging.log_scalar_metrics(
            'agent',
            self._epoch,
            {'time': time_diff}
        )
        for episode in episodes:
            self._trainer.add_episode(episode)

        if self._epoch >= self._n_precollect_epochs:
            metrics = self._trainer.train_epoch(self._network)
            metric_logging.log_scalar_metrics(
                'train',
                self._epoch,
                metrics
            )

        if self._epoch == self._n_precollect_epochs:
            # Save gin operative config into a file. "Operative" means the part
            # that is actually used in the experiment. We need to run an full
            # epoch (data collection + training) first, so gin can figure that
            # out.
            self._save_gin()

        self._epoch += 1
        print('')
Exemple #13
0
    def update_and_log(self, episodes, epoch):
        """

        Args:
            episodes: List of completed episodes (Agent/Trainer-dependent).
            epoch (int): epoch number
        """
        time_stamp = time.time()
        n_epoch_states, n_epoch_transitions = self._update(episodes)
        metric_logging.log_scalar_metrics(
            'run_time', epoch,
            {'update_experienced_states': time.time() - time_stamp})

        metric_logging.log_scalar_metrics(
            'exploration', epoch, {
                'total_experienced_states_in_history':
                len(self._experienced_states),
                'total_experienced_transitions_in_history':
                len(self._experienced_transitions),
                'total_experienced_states_in_epoch':
                n_epoch_states,
                'total_experienced_transitions_in_epoch':
                n_epoch_transitions,
            })
Exemple #14
0
    def run_epoch(self):
        start_time = time.time()
        episodes = self._batch_stepper.run_episode_batch(
            self._network.params,
            epoch=max(0, self._epoch - self._n_precollect_epochs),
            time_limit=self._episode_time_limit)
        episode_metrics = {
            'count': self._total_episodes,
            'time': time.time() - start_time,
        }
        episode_metrics.update(metric_utils.compute_episode_metrics(episodes))
        self._total_episodes += len(episodes)
        metric_logging.log_scalar_metrics('episode', self._epoch,
                                          episode_metrics,
                                          self._metric_smoothing)
        metric_logging.log_scalar_metrics(
            'agent', self._epoch, self._agent_class.compute_metrics(episodes),
            self._metric_smoothing)

        for episode in episodes:
            self._trainer.add_episode(episode)

        if self._epoch >= self._n_precollect_epochs:
            metrics = self._trainer.train_epoch(self._network)
            metric_logging.log_scalar_metrics('train', self._epoch, metrics,
                                              self._metric_smoothing)

        for callback in self._callbacks:
            callback.on_epoch_end(self._epoch, self._network.params)

        if self._epoch == self._n_precollect_epochs:
            self._save_gin()

        self._epoch += 1

        if time.time(
        ) - self._last_save_time > 3600 or self._epoch == self._n_epochs:
            self.save()
Exemple #15
0
    def _generate_random_episodes(self):
        log_every_n_epochs = max(1, self._n_model_precollect_epochs // 10)

        random_episodes_metrics_helper = {
            'return_total': 0,
            'length_total': 0,
            'solved': 0,
            'count': 0
        }

        for i in range(self._n_model_precollect_epochs):
            episodes = self._random_episodes_batch_stepper.run_episode_batch(
                agent_params=None, model_params=None, epoch=0,
                time_limit=self._episode_time_limit
            )
            # Calculate metrics and update metrics helper.
            episodes_batch_metrics = self._compute_episode_metrics(episodes)
            random_episodes_metrics_helper['count'] += len(episodes)
            random_episodes_metrics_helper['return_total'] += \
                episodes_batch_metrics['return_mean'] * len(episodes)
            random_episodes_metrics_helper['length_total'] += \
                episodes_batch_metrics['length_mean'] * len(episodes)
            random_episodes_metrics_helper['solved'] += \
                episodes_batch_metrics['solved_rate'] * len(episodes)

            # Add episodes to the replay buffer.
            for episode in episodes:
                self._model_trainer.add_episode(
                    episode, {'random_episode': True}
                )

            if i % log_every_n_epochs == log_every_n_epochs - 1:
                metric_logging.log_scalar(
                    'random_episodes/generated',
                    step=i,
                    value=i / self._n_model_precollect_epochs
                )

        metric_logging.log_scalar(
            'random_episodes/generated',
            step=self._n_model_precollect_epochs,
            value=1.0
        )

        random_episodes_metrics = {
            'return_mean':
                random_episodes_metrics_helper['return_total'] /
                random_episodes_metrics_helper['count'],
            'length_mean':
                random_episodes_metrics_helper['length_total'] /
                random_episodes_metrics_helper['count'],
            'solved_rate':
                random_episodes_metrics_helper['solved'] /
                random_episodes_metrics_helper['count'],
            'count': random_episodes_metrics_helper['count'],
        }
        metric_logging.log_scalar_metrics(
            'random_episodes',
            self._epoch,
            random_episodes_metrics
        )