def _log_graph_distance_metrics(self, epoch): metric_logging.log_scalar_metrics( 'graph', epoch, { 'max_distance_from_start_in_history': self._max_distance_from_start, 'min_distance_to_goal_in_history': self._min_distance_to_goal })
def update_and_log(self, episodes, epoch): """ Args: episodes: List of completed episodes (Agent/Trainer-dependent). epoch (int): epoch number """ time_stamp = time.time() episode_min_goal_distance = [] episode_max_start_distance = [] for episode in episodes: distances_to_goal = list() distances_from_start = list() for observation in ( list(episode.transition_batch.observation) + [episode.transition_batch.next_observation[-1]]): state = self._graph_env.obs2state(observation) distances_to_goal.append( self._distance_to_goal.get(state, float('inf'))) distances_from_start.append(self._distance_from_start[state]) episode_min_goal_distance.append(min(distances_to_goal)) episode_max_start_distance.append(max(distances_from_start)) self._max_distance_from_start = max(self._max_distance_from_start, max(episode_max_start_distance)) self._min_distance_to_goal = min(self._min_distance_to_goal, min(episode_min_goal_distance)) self._log_graph_distance_metrics(epoch) metric_logging.log_scalar_metrics( 'run_time', epoch, {'update_and_log_graph_distances': time.time() - time_stamp})
def _run_agent_train_epoch(self): time_stamp_before_train = time.time() metrics = self._trainer.train_epoch(self._network) metric_logging.log_scalar_metrics( 'run_time', self._epoch, {'agent_train': time.time() - time_stamp_before_train} ) metric_logging.log_scalar_metrics('agent_train', self._epoch, metrics )
def _maybe_log_single_episode_visualization(self, episodes): if ( self._log_rollout_every_n_epochs is not None and self._epoch % self._log_rollout_every_n_epochs == 0 ): time_stamp_before_log = time.time() self._log_single_episode_visualization(episodes) time_diff = time.time() - time_stamp_before_log metric_logging.log_scalar_metrics( 'run_time', self._epoch, {'single_rollout': time_diff} )
def _maybe_log_transitions_visualizations(self, episodes): if ( self._log_other_images_every_n_epochs is not None and self._epoch % self._log_other_images_every_n_epochs == 0 ): time_stamp_before_log = time.time() self._log_hard_transitions_predictions_visualizations(episodes) self._log_top_priority_transitions() time_diff = time.time() - time_stamp_before_log metric_logging.log_scalar_metrics( 'run_time', self._epoch, {'other_images': time_diff} )
def _log_episodes_metrics_and_images(self, episodes): metric_logging.log_scalar_metrics( 'agent', self._epoch, self._agent_class.compute_metrics(episodes) ) metric_logging.log_scalar_metrics( 'env', self._epoch, self._logging_env.compute_metrics(episodes) ) metric_logging.log_scalar_metrics( 'episode', self._epoch, self._compute_episode_metrics(episodes) ) metric_logging.log_scalar_metrics( 'episode_rolling', self._epoch, self._compute_metrics_across_epochs(episodes) ) self._maybe_log_n_experienced_states(episodes) self._maybe_update_and_log_graph_distance_metrics(episodes) self._maybe_log_trainable_env_model_metrics_and_images(episodes) self._maybe_log_single_episode_visualization(episodes) self._maybe_log_agent_visit_heat_map(episodes) self._maybe_log_transitions_visualizations(episodes)
def _maybe_log_agent_visit_heat_map(self, episodes): if ( self._log_heat_map_every_n_epochs is not None and self._epoch % self._log_heat_map_every_n_epochs == 0 ): time_stamp_before_log = time.time() self._log_agent_visit_heat_map( episodes, self._log_detailed_heat_map ) time_diff = time.time() - time_stamp_before_log metric_logging.log_scalar_metrics( 'run_time', self._epoch, {'heat_map': time_diff} )
def _run_model_train_epoch(self, train_phase, train_epoch): time_stamp_before_train = time.time() model_train_metrics = self._model_trainer.train_epoch( self._model_network ) time_diff = time.time() - time_stamp_before_train metric_logging.log_scalar_metrics( 'run_time', train_epoch, {train_phase: time_diff} ) metric_logging.log_scalar_metrics( train_phase, train_epoch, model_train_metrics )
def _maybe_log_trainable_env_model_metrics_and_images(self, episodes): if self._model_network is not None: metrics = self._compute_trainable_model_metrics(episodes) metric_logging.log_scalar_metrics( 'episode_model', self._epoch, metrics ) if self._log_false_positive_images: time_stamp_before_log = time.time() self._log_transitions_with_false_positive_rewards(episodes) time_diff = time.time() - time_stamp_before_log metric_logging.log_scalar_metrics( 'run_time', self._epoch, {'false_positive_images': time_diff} )
def _run_episode_batch(self): time_stamp_before_run_agent = time.time() if self._model_network is not None: model_params = self._model_network.params else: model_params = None episodes = self._batch_stepper.run_episode_batch( self._network.params, model_params, epoch=max(0, self._epoch), time_limit=self._episode_time_limit ) metric_logging.log_scalar_metrics( 'run_time', self._epoch, {'episode_batch': time.time() - time_stamp_before_run_agent} ) return episodes
def hard_eval_run(self, env): print('Hard eval started') env.env.scrambleSize = 100 env.env.step_limit = 150 self._n_passes = 8000 count = 0 solved = 0 while True: beg_time = time.time() episode = yield from self.solve(env, count, dummy=True, hard=True) run_time = time.time() - beg_time solved += int(episode.solved) count += 1 metric_logging.log_scalar_metrics( 'hard_eval', count, { 'solved': solved / count, 'solving time': run_time, 'steps': 1 - episode.return_ })
def run_epoch(self): """Runs a single epoch.""" episodes = self._batch_stepper.run_episode_batch( self._network.params, epoch=max(0, self._epoch - self._n_precollect_epochs), time_limit=self._episode_time_limit ) self._total_episodes += len(episodes) metric_logging.log_scalar_metrics( 'episode', self._epoch, self._compute_episode_metrics(episodes) ) metric_logging.log_scalar_metrics( 'agent', self._epoch, self._agent_class.compute_metrics(episodes) ) new_time_stamp = time.time() time_diff = new_time_stamp - self.time_stamp self.time_stamp = new_time_stamp metric_logging.log_scalar_metrics( 'agent', self._epoch, {'time': time_diff} ) for episode in episodes: self._trainer.add_episode(episode) if self._epoch >= self._n_precollect_epochs: metrics = self._trainer.train_epoch(self._network) metric_logging.log_scalar_metrics( 'train', self._epoch, metrics ) if self._epoch == self._n_precollect_epochs: # Save gin operative config into a file. "Operative" means the part # that is actually used in the experiment. We need to run an full # epoch (data collection + training) first, so gin can figure that # out. self._save_gin() self._epoch += 1 print('')
def update_and_log(self, episodes, epoch): """ Args: episodes: List of completed episodes (Agent/Trainer-dependent). epoch (int): epoch number """ time_stamp = time.time() n_epoch_states, n_epoch_transitions = self._update(episodes) metric_logging.log_scalar_metrics( 'run_time', epoch, {'update_experienced_states': time.time() - time_stamp}) metric_logging.log_scalar_metrics( 'exploration', epoch, { 'total_experienced_states_in_history': len(self._experienced_states), 'total_experienced_transitions_in_history': len(self._experienced_transitions), 'total_experienced_states_in_epoch': n_epoch_states, 'total_experienced_transitions_in_epoch': n_epoch_transitions, })
def run_epoch(self): start_time = time.time() episodes = self._batch_stepper.run_episode_batch( self._network.params, epoch=max(0, self._epoch - self._n_precollect_epochs), time_limit=self._episode_time_limit) episode_metrics = { 'count': self._total_episodes, 'time': time.time() - start_time, } episode_metrics.update(metric_utils.compute_episode_metrics(episodes)) self._total_episodes += len(episodes) metric_logging.log_scalar_metrics('episode', self._epoch, episode_metrics, self._metric_smoothing) metric_logging.log_scalar_metrics( 'agent', self._epoch, self._agent_class.compute_metrics(episodes), self._metric_smoothing) for episode in episodes: self._trainer.add_episode(episode) if self._epoch >= self._n_precollect_epochs: metrics = self._trainer.train_epoch(self._network) metric_logging.log_scalar_metrics('train', self._epoch, metrics, self._metric_smoothing) for callback in self._callbacks: callback.on_epoch_end(self._epoch, self._network.params) if self._epoch == self._n_precollect_epochs: self._save_gin() self._epoch += 1 if time.time( ) - self._last_save_time > 3600 or self._epoch == self._n_epochs: self.save()
def _generate_random_episodes(self): log_every_n_epochs = max(1, self._n_model_precollect_epochs // 10) random_episodes_metrics_helper = { 'return_total': 0, 'length_total': 0, 'solved': 0, 'count': 0 } for i in range(self._n_model_precollect_epochs): episodes = self._random_episodes_batch_stepper.run_episode_batch( agent_params=None, model_params=None, epoch=0, time_limit=self._episode_time_limit ) # Calculate metrics and update metrics helper. episodes_batch_metrics = self._compute_episode_metrics(episodes) random_episodes_metrics_helper['count'] += len(episodes) random_episodes_metrics_helper['return_total'] += \ episodes_batch_metrics['return_mean'] * len(episodes) random_episodes_metrics_helper['length_total'] += \ episodes_batch_metrics['length_mean'] * len(episodes) random_episodes_metrics_helper['solved'] += \ episodes_batch_metrics['solved_rate'] * len(episodes) # Add episodes to the replay buffer. for episode in episodes: self._model_trainer.add_episode( episode, {'random_episode': True} ) if i % log_every_n_epochs == log_every_n_epochs - 1: metric_logging.log_scalar( 'random_episodes/generated', step=i, value=i / self._n_model_precollect_epochs ) metric_logging.log_scalar( 'random_episodes/generated', step=self._n_model_precollect_epochs, value=1.0 ) random_episodes_metrics = { 'return_mean': random_episodes_metrics_helper['return_total'] / random_episodes_metrics_helper['count'], 'length_mean': random_episodes_metrics_helper['length_total'] / random_episodes_metrics_helper['count'], 'solved_rate': random_episodes_metrics_helper['solved'] / random_episodes_metrics_helper['count'], 'count': random_episodes_metrics_helper['count'], } metric_logging.log_scalar_metrics( 'random_episodes', self._epoch, random_episodes_metrics )