def _print_train_log(self, scores, overall_start_time, last_log_time, steps): current_time = time.time() mean_score = np.mean(scores) score_std = np.std(scores) min_score = np.min(scores) max_score = np.max(scores) elapsed_time = time.time() - overall_start_time global_steps = self._global_steps_counter.get() local_steps_per_sec = steps / (current_time - last_log_time) global_steps_per_sec = global_steps / elapsed_time global_mil_steps_per_hour = global_steps_per_sec * 3600 / 1000000.0 log( "TRAIN: {}(GlobalSteps), {} episodes, mean: {}, min: {}, max: {}, " "\nLocalSpd: {:.0f} STEPS/s GlobalSpd: " "{} STEPS/s, {:.2f}M STEPS/hour, total elapsed time: {}".format( global_steps, len(scores), green("{:0.3f}±{:0.2f}".format(mean_score, score_std)), red("{:0.3f}".format(min_score)), blue("{:0.3f}".format(max_score)), local_steps_per_sec, blue("{:.0f}".format( global_steps_per_sec)), global_mil_steps_per_hour, sec_to_str(elapsed_time) ))
def test(self, episodes_num=None, deterministic=True): if episodes_num is None: episodes_num = self.test_episodes_per_epoch test_start_time = time.time() test_rewards = [] test_actions = [] test_frameskips = [] for _ in trange(episodes_num, desc="Testing", file=sys.stdout, leave=False, disable=not self.enable_progress_bar): total_reward, actions, frameskips, _ = self.run_episode(deterministic=deterministic, return_stats=True) test_rewards.append(total_reward) test_actions += actions test_frameskips += frameskips self.doom_wrapper.reset() if self.local_network.has_state(): self.local_network.reset_state() test_end_time = time.time() test_duration = test_end_time - test_start_time min_score = np.min(test_rewards) max_score = np.max(test_rewards) mean_score = np.mean(test_rewards) score_std = np.std(test_rewards) log( "TEST: mean: {}, min: {}, max: {}, test time: {}".format( green("{:0.3f}±{:0.2f}".format(mean_score, score_std)), red("{:0.3f}".format(min_score)), blue("{:0.3f}".format(max_score)), sec_to_str(test_duration))) return test_rewards, test_actions, test_frameskips
def run(self): # TODO this method is ugly, make it nicer ...and it's the same as above.... really TODO!! # Basically code copied from base class with unfreezing try: overall_start_time = time.time() last_log_time = overall_start_time local_steps_for_log = 0 next_target_update = self.frozen_global_steps while self._epoch <= self._epochs: steps = self.make_training_step() local_steps_for_log += steps global_steps = self._global_steps_counter.inc(steps) # Updating target network: if self.unfreeze_thread: # TODO this check is dangerous if global_steps >= next_target_update: next_target_update += self.frozen_global_steps if next_target_update <= global_steps: # TODO use warn from the logger logging.warning(yellow("Global steps ({}) <= next target update ({}).".format( global_steps, next_target_update))) self._session.run(self.global_network.ops.unfreeze) # Logs & tests if self.local_steps_per_epoch * self._epoch <= self.local_steps: self._epoch += 1 if self.thread_index == 0: self._print_train_log(self.train_scores, overall_start_time, last_log_time, local_steps_for_log) if self._run_tests: test_scores, actions, frameskips = self.test(deterministic=self.deterministic_testing) if self.write_summaries: train_summary = self._session.run(self._summaries, {self.scores_placeholder: self.train_scores}) self._train_writer.add_summary(train_summary, global_steps) if self._run_tests: test_summary = self._session.run(self._summaries, {self.scores_placeholder: test_scores}) self._test_writer.add_summary(test_summary, global_steps) last_log_time = time.time() local_steps_for_log = 0 log("Learning rate: {}".format(self._session.run(self.learning_rate))) # Saves model if self._epoch % self.save_interval == 0: self.save_model() log("") self.train_scores = [] self.train_actions = [] self.train_frameskips = [] except (SignalException, ViZDoomUnexpectedExitException): threadsafe_print(red("Thread #{} aborting(ViZDoom killed).".format(self.thread_index)))
def run(self): # TODO this method is ugly, make it nicer try: overall_start_time = time.time() last_log_time = overall_start_time local_steps_for_log = 0 while self._epoch <= self._epochs: steps = self.make_training_step() local_steps_for_log += steps global_steps = self._global_steps_counter.inc(steps) # Logs & tests if self.local_steps_per_epoch * self._epoch <= self.local_steps: self._epoch += 1 if self.thread_index == 0: self._print_train_log(self.train_scores, overall_start_time, last_log_time, local_steps_for_log) if self._run_tests: test_scores = self.test( deterministic=self.deterministic_testing) if self.write_summaries: train_summary = self._session.run( self._summaries, {self.scores_placeholder: self.train_scores}) self._train_writer.add_summary( train_summary, global_steps) if self._run_tests: test_summary = self._session.run( self._summaries, {self.scores_placeholder: test_scores}) self._test_writer.add_summary( test_summary, global_steps) last_log_time = time.time() local_steps_for_log = 0 log("Learning rate: {}".format( self._session.run(self.learning_rate))) # Saves model if self._epoch % self.save_interval == 0: self.save_model() log("") self.train_scores = [] except (SignalException, ViZDoomUnexpectedExitException): threadsafe_print( red("Thread #{} aborting(ViZDoom killed).".format( self.thread_index)))
def print_epoch_log(prefix, scores, steps, epoch_time): mean_score = np.mean(scores) score_std = np.std(scores) min_score = np.min(scores) max_score = np.max(scores) episodes = len(scores) steps_per_sec = steps / epoch_time mil_steps_per_hour = steps_per_sec * 3600 / 1000000.0 log("{}: Episodes: {}, mean: {}, min: {}, max: {}, " " Speed: {:.0f} STEPS/s, {:.2f}M STEPS/hour, time: {}".format( prefix, episodes, green("{:0.3f}±{:0.2f}".format(mean_score, score_std)), red("{:0.3f}".format(min_score)), blue("{:0.3f}".format(max_score)), steps_per_sec, mil_steps_per_hour, sec_to_str(epoch_time)))
def run(self): # TODO this method is ugly, make it nicer try: overall_start_time = time.time() last_log_time = overall_start_time local_steps_for_log = 0 while self._epoch <= self._epochs: steps = self.make_training_step() local_steps_for_log += steps global_steps = self._global_steps_counter.inc(steps) # Logs & tests if self.local_steps_per_epoch * self._epoch <= self.local_steps: self._epoch += 1 if self.thread_index == 0: log("EPOCH {}".format(self._epoch - 1)) self._print_train_log( self.train_scores, overall_start_time, last_log_time, local_steps_for_log) run_test_this_epoch = ((self._epoch - 1) % self.test_interval) == 0 if self._run_tests and run_test_this_epoch: test_scores, test_actions, test_frameskips = self.test( deterministic=self.deterministic_testing) if self.write_summaries: train_summary = self._session.run(self._summaries, {self.scores_placeholder: self.train_scores, self.actions_placeholder: self.train_actions, self.frameskips_placeholder: self.train_frameskips}) self._train_writer.add_summary(train_summary, global_steps) if self._run_tests and run_test_this_epoch: test_summary = self._session.run(self._summaries, {self.scores_placeholder: test_scores, self.actions_placeholder: test_actions, self.frameskips_placeholder: test_frameskips}) self._test_writer.add_summary(test_summary, global_steps) last_log_time = time.time() local_steps_for_log = 0 log("Learning rate: {}".format(self._session.run(self.learning_rate))) # Saves model if self._epoch % self.save_interval == 0: self.save_model() now = datetime.datetime.now() log("Time: {:2d}:{:02d}".format(now.hour, now.minute)) if self.show_heatmaps: log("Train heatmaps:") log(self.heatmap(self.train_actions, self.train_frameskips)) log("") if run_test_this_epoch: log("Test heatmaps:") log(self.heatmap(test_actions, test_frameskips)) log("") self.train_scores = [] self.train_actions = [] self.train_frameskips = [] threadsafe_print("Thread {} finished.".format(self.thread_index)) except (SignalException, ViZDoomUnexpectedExitException): threadsafe_print(red("Thread #{} aborting(ViZDoom killed).".format(self.thread_index)))