def _print_train_log(self, scores, overall_start_time, last_log_time, steps): current_time = time.time() mean_score = np.mean(scores) score_std = np.std(scores) min_score = np.min(scores) max_score = np.max(scores) elapsed_time = time.time() - overall_start_time global_steps = self._global_steps_counter.get() local_steps_per_sec = steps / (current_time - last_log_time) global_steps_per_sec = global_steps / elapsed_time global_mil_steps_per_hour = global_steps_per_sec * 3600 / 1000000.0 log( "TRAIN: {}(GlobalSteps), {} episodes, mean: {}, min: {}, max: {}, " "\nLocalSpd: {:.0f} STEPS/s GlobalSpd: " "{} STEPS/s, {:.2f}M STEPS/hour, total elapsed time: {}".format( global_steps, len(scores), green("{:0.3f}±{:0.2f}".format(mean_score, score_std)), red("{:0.3f}".format(min_score)), blue("{:0.3f}".format(max_score)), local_steps_per_sec, blue("{:.0f}".format( global_steps_per_sec)), global_mil_steps_per_hour, sec_to_str(elapsed_time) ))
def test(self, episodes_num=None, deterministic=True): if episodes_num is None: episodes_num = self.test_episodes_per_epoch test_start_time = time.time() test_rewards = [] test_actions = [] test_frameskips = [] for _ in trange(episodes_num, desc="Testing", file=sys.stdout, leave=False, disable=not self.enable_progress_bar): total_reward, actions, frameskips, _ = self.run_episode(deterministic=deterministic, return_stats=True) test_rewards.append(total_reward) test_actions += actions test_frameskips += frameskips self.doom_wrapper.reset() if self.local_network.has_state(): self.local_network.reset_state() test_end_time = time.time() test_duration = test_end_time - test_start_time min_score = np.min(test_rewards) max_score = np.max(test_rewards) mean_score = np.mean(test_rewards) score_std = np.std(test_rewards) log( "TEST: mean: {}, min: {}, max: {}, test time: {}".format( green("{:0.3f}±{:0.2f}".format(mean_score, score_std)), red("{:0.3f}".format(min_score)), blue("{:0.3f}".format(max_score)), sec_to_str(test_duration))) return test_rewards, test_actions, test_frameskips
def isReady(self, state): # print (self.cur_temp, state['time']) if self.cur_temp >= self.set_temp: print("Finish " + self.name + " at " + sec_to_str(state['time'])) return True else: return False
def print_epoch_log(prefix, scores, steps, epoch_time): mean_score = np.mean(scores) score_std = np.std(scores) min_score = np.min(scores) max_score = np.max(scores) episodes = len(scores) steps_per_sec = steps / epoch_time mil_steps_per_hour = steps_per_sec * 3600 / 1000000.0 log("{}: Episodes: {}, mean: {}, min: {}, max: {}, " " Speed: {:.0f} STEPS/s, {:.2f}M STEPS/hour, time: {}".format( prefix, episodes, green("{:0.3f}±{:0.2f}".format(mean_score, score_std)), red("{:0.3f}".format(min_score)), blue("{:0.3f}".format(max_score)), steps_per_sec, mil_steps_per_hour, sec_to_str(epoch_time)))
def check(self, s): if self.seqIdx == len(self.sequence): return self.isMet(s) if self.seqIdx > 0: if (s['act_truth'] in [ ac['act_truth'] for ac in self.sequence[self.seqIdx - 1] ]): self.last = s['time'] if s['time'] - self.last > self.seqWaitTime: self.reset() return 0 if (s['act_truth'] in [ac['act_truth'] for ac in self.sequence[self.seqIdx]]): self.seqIdx += 1 if self.seqIdx == len(self.sequence): print(self.name + ' met at time:{}'.format(sec_to_str(s['time']))) self.last = s['time'] return 0
def train(self, session): # Prefill replay memory: for _ in trange(self.replay_memory.capacity, desc="Filling replay memory", leave=False, disable=not self.enable_progress_bar, file=sys.stdout): if self.doom_wrapper.is_terminal(): self.doom_wrapper.reset() s1 = self.doom_wrapper.get_current_state() action_frameskip_index = randint(0, self.actions_num * len(self.frameskips) - 1) action_index, frameskip = self.get_action_and_frameskip(action_frameskip_index) reward = self.doom_wrapper.make_action(action_index, frameskip) terminal = self.doom_wrapper.is_terminal() s2 = self.doom_wrapper.get_current_state() self.replay_memory.add_transition(s1, action_frameskip_index, s2, reward, terminal) overall_start_time = time() self.network.update_target_network(session) log(green("Starting training.\n")) while self._epoch <= self._epochs: self.doom_wrapper.reset() train_scores = [] test_scores = [] train_start_time = time() for _ in trange(self.train_steps_per_epoch, desc="Training, epoch {}".format(self._epoch), leave=False, disable=not self.enable_progress_bar, file=sys.stdout): self.steps += 1 s1 = self.doom_wrapper.get_current_state() if random() <= self.get_current_epsilon(): action_frameskip_index = randint(0, self.actions_num*len(self.frameskips) - 1) action_index, frameskip = self.get_action_and_frameskip(action_frameskip_index) else: action_frameskip_index = self.network.get_action(session, s1) action_index, frameskip = self.get_action_and_frameskip(action_frameskip_index) reward = self.doom_wrapper.make_action(action_index, frameskip) terminal = self.doom_wrapper.is_terminal() s2 = self.doom_wrapper.get_current_state() self.replay_memory.add_transition(s1, action_frameskip_index, s2, reward, terminal) if self.steps % self.update_pattern[0] == 0: for _ in range(self.update_pattern[1]): self.network.train_batch(session, self.replay_memory.get_sample()) if terminal: train_scores.append(self.doom_wrapper.get_total_reward()) self.doom_wrapper.reset() if self.steps % self.frozen_steps == 0: self.network.update_target_network(session) train_time = time() - train_start_time log("Epoch {}".format(self._epoch)) log("Training steps: {}, epsilon: {}".format(self.steps, self.get_current_epsilon())) self.print_epoch_log("TRAIN", train_scores, self.train_steps_per_epoch, train_time) test_start_time = time() test_steps = 0 # TESTING for _ in trange(self.test_episodes_per_epoch, desc="Testing, epoch {}".format(self._epoch), leave=False, disable=not self.enable_progress_bar, file=sys.stdout): self.doom_wrapper.reset() while not self.doom_wrapper.is_terminal(): test_steps += 1 state = self.doom_wrapper.get_current_state() action_frameskip_index = self.network.get_action(session, state) action_index, frameskip = self.get_action_and_frameskip(action_frameskip_index) self.doom_wrapper.make_action(action_index, frameskip) test_scores.append(self.doom_wrapper.get_total_reward()) test_time = time() - test_start_time self.print_epoch_log("TEST", test_scores, test_steps, test_time) if self.write_summaries: log("Writing summaries.") train_summary = session.run(self._summaries, {self.scores_placeholder: train_scores}) self._train_writer.add_summary(train_summary, self.steps) if self._run_tests: test_summary = session.run(self._summaries, {self.scores_placeholder: test_scores}) self._test_writer.add_summary(test_summary, self.steps) # Save model if self._epoch % self.save_interval == 0: savedir = os.path.dirname(self._model_savefile) if not os.path.exists(savedir): log("Creating directory: {}".format(savedir)) os.makedirs(savedir) log("Saving model to: {}".format(self._model_savefile)) saver = tf.train.Saver() saver.save(session, self._model_savefile) overall_time = time() - overall_start_time log("Total elapsed time: {}\n".format(sec_to_str(overall_time))) self._epoch += 1