Esempio n. 1
0
 def on_episode_end(self):
     """Called in the end of an episode."""
     if not self._log_current_episode:
         return
     # WARNING: self.epoch is the same for many steps/episodes, this might
     # need rewriting in the future.
     metric_logging.log_scalar('episode/graph_size', self._epoch,
                               len(self._episode_observations))
Esempio n. 2
0
 def on_real_step(self, agent_info, action, observation, reward, done):
     """Called after every step in the real environment."""
     if not self._log_current_episode:
         return
     # WARNING: self.epoch is the same for many steps/episodes, this might
     # need rewriting in the future.
     metric_logging.log_scalar('step/graph_size', self._epoch,
                               len(self._step_observations))
     observation = self._preprocess_observation(observation)
     self._episode_observations.add(observation)
     self._step_observations = {observation}
Esempio n. 3
0
    def _log_episode_metrics(self, episodes):
        return_mean = sum(episode.return_
                          for episode in episodes) / len(episodes)
        metric_logging.log_scalar('return_mean', self._epoch, return_mean)

        solved_list = [
            int(episode.solved) for episode in episodes
            if episode.solved is not None
        ]
        if solved_list:
            solved_rate = sum(solved_list) / len(solved_list)
            metric_logging.log_scalar('solved_rate', self._epoch, solved_rate)
Esempio n. 4
0
    def solve(self, env, epoch=None, init_state=None, time_limit=None):
        del env
        del init_state
        del time_limit
        for attr_name, schedule in self._parameter_schedules.items():
            param_value = schedule(epoch)
            utils.recursive_setattr(self, attr_name, param_value)
            metric_logging.log_scalar('agent_param/' + attr_name, epoch,
                                      param_value)

        return
        yield
Esempio n. 5
0
    def run_epoch(self):
        """Runs a single epoch."""
        metric_logging.log_scalar(
            'epoch',
            self._epoch,
            self._epoch,
        )
        episodes = self._run_episode_batch()
        self._total_episodes += len(episodes)
        self._log_episodes_metrics_and_images(episodes)

        for episode in episodes:
            self._trainer.add_episode(episode)

        if self._fine_tune_on_agent_data:
            for episode in episodes:
                self._model_trainer.add_episode(
                    episode, {'random_episode': False}
                )

        if (self._agent_reset_schedule is not None and
                self._epoch in self._agent_reset_schedule):
            self._network.reset()
            self._trainer.clear_experience()
            self._last_agent_reset = self._epoch

        if self._epoch >= self._last_agent_reset + self._n_precollect_epochs:
            self._run_agent_train_epoch()

        if self._epoch >= self._n_precollect_epochs:
            # We start training model after precollecting data with the agent
            # as well (i. e. we do not start training with only one batch of
            # episodes, and rather we wait n_precollect_epochs).
            if self._fine_tune_on_agent_data:
                self._run_model_train_epoch('model_fine_tune', self._epoch)

        if self._epoch == self._n_precollect_epochs:
            # Save gin operative config into a file. "Operative" means the part
            # that is actually used in the experiment. We need to run an full
            # epoch (data collection + training) first, so gin can figure that
            # out.
            self._save_gin()

        self._epoch += 1
 def __init__(self, env):
     assert hasattr(env, 'obs2state')
     time_stamp = time.time()
     self._graph_env = env
     self._graph, goal_states = \
         generate_env_state_space_graph_and_goal_states(self._graph_env)
     self._graph_env.reset()
     start_state = env.clone_state()
     self._distance_from_start = self._graph.calc_distances_from(
         start_state)
     self._distance_to_goal = calc_distance_to_closest_goal(
         goal_states, self._graph)
     self._min_distance_to_goal = float('inf')
     self._max_distance_from_start = 0
     metric_logging.log_scalar(
         'graph/size',
         step=0,
         value=len(self._graph.edges.keys()
                   | self._graph.edges_transposed.keys()))
     metric_logging.log_scalar('run_time/graph_calculation',
                               step=0,
                               value=time.time() - time_stamp)
Esempio n. 7
0
 def _log_training_metrics(self, metrics):
     for (name, value) in metrics.items():
         metric_logging.log_scalar('train_raw/' + name, self._epoch, value)
Esempio n. 8
0
    def _generate_random_episodes(self):
        log_every_n_epochs = max(1, self._n_model_precollect_epochs // 10)

        random_episodes_metrics_helper = {
            'return_total': 0,
            'length_total': 0,
            'solved': 0,
            'count': 0
        }

        for i in range(self._n_model_precollect_epochs):
            episodes = self._random_episodes_batch_stepper.run_episode_batch(
                agent_params=None, model_params=None, epoch=0,
                time_limit=self._episode_time_limit
            )
            # Calculate metrics and update metrics helper.
            episodes_batch_metrics = self._compute_episode_metrics(episodes)
            random_episodes_metrics_helper['count'] += len(episodes)
            random_episodes_metrics_helper['return_total'] += \
                episodes_batch_metrics['return_mean'] * len(episodes)
            random_episodes_metrics_helper['length_total'] += \
                episodes_batch_metrics['length_mean'] * len(episodes)
            random_episodes_metrics_helper['solved'] += \
                episodes_batch_metrics['solved_rate'] * len(episodes)

            # Add episodes to the replay buffer.
            for episode in episodes:
                self._model_trainer.add_episode(
                    episode, {'random_episode': True}
                )

            if i % log_every_n_epochs == log_every_n_epochs - 1:
                metric_logging.log_scalar(
                    'random_episodes/generated',
                    step=i,
                    value=i / self._n_model_precollect_epochs
                )

        metric_logging.log_scalar(
            'random_episodes/generated',
            step=self._n_model_precollect_epochs,
            value=1.0
        )

        random_episodes_metrics = {
            'return_mean':
                random_episodes_metrics_helper['return_total'] /
                random_episodes_metrics_helper['count'],
            'length_mean':
                random_episodes_metrics_helper['length_total'] /
                random_episodes_metrics_helper['count'],
            'solved_rate':
                random_episodes_metrics_helper['solved'] /
                random_episodes_metrics_helper['count'],
            'count': random_episodes_metrics_helper['count'],
        }
        metric_logging.log_scalar_metrics(
            'random_episodes',
            self._epoch,
            random_episodes_metrics
        )