def _print_train_log(self, scores, overall_start_time, last_log_time, steps):
        current_time = time.time()
        mean_score = np.mean(scores)
        score_std = np.std(scores)
        min_score = np.min(scores)
        max_score = np.max(scores)

        elapsed_time = time.time() - overall_start_time
        global_steps = self._global_steps_counter.get()
        local_steps_per_sec = steps / (current_time - last_log_time)
        global_steps_per_sec = global_steps / elapsed_time
        global_mil_steps_per_hour = global_steps_per_sec * 3600 / 1000000.0
        log(
            "TRAIN: {}(GlobalSteps), {} episodes, mean: {}, min: {}, max: {}, "
            "\nLocalSpd: {:.0f} STEPS/s GlobalSpd: "
            "{} STEPS/s, {:.2f}M STEPS/hour, total elapsed time: {}".format(
                global_steps,
                len(scores),
                green("{:0.3f}±{:0.2f}".format(mean_score, score_std)),
                red("{:0.3f}".format(min_score)),
                blue("{:0.3f}".format(max_score)),
                local_steps_per_sec,
                blue("{:.0f}".format(
                    global_steps_per_sec)),
                global_mil_steps_per_hour,
                sec_to_str(elapsed_time)
            ))
    def test(self, episodes_num=None, deterministic=True):
        if episodes_num is None:
            episodes_num = self.test_episodes_per_epoch

        test_start_time = time.time()
        test_rewards = []
        test_actions = []
        test_frameskips = []
        for _ in trange(episodes_num, desc="Testing", file=sys.stdout,
                        leave=False, disable=not self.enable_progress_bar):
            total_reward, actions, frameskips, _ = self.run_episode(deterministic=deterministic, return_stats=True)
            test_rewards.append(total_reward)
            test_actions += actions
            test_frameskips += frameskips

        self.doom_wrapper.reset()
        if self.local_network.has_state():
            self.local_network.reset_state()

        test_end_time = time.time()
        test_duration = test_end_time - test_start_time
        min_score = np.min(test_rewards)
        max_score = np.max(test_rewards)
        mean_score = np.mean(test_rewards)
        score_std = np.std(test_rewards)
        log(
            "TEST: mean: {}, min: {}, max: {}, test time: {}".format(
                green("{:0.3f}±{:0.2f}".format(mean_score, score_std)),
                red("{:0.3f}".format(min_score)),
                blue("{:0.3f}".format(max_score)),
                sec_to_str(test_duration)))
        return test_rewards, test_actions, test_frameskips
    def run(self):
        # TODO this method is ugly, make it nicer ...and it's the same as above.... really TODO!!
        # Basically code copied from base class with unfreezing
        try:
            overall_start_time = time.time()
            last_log_time = overall_start_time
            local_steps_for_log = 0
            next_target_update = self.frozen_global_steps
            while self._epoch <= self._epochs:
                steps = self.make_training_step()
                local_steps_for_log += steps
                global_steps = self._global_steps_counter.inc(steps)

                # Updating target network:
                if self.unfreeze_thread:
                    # TODO this check is dangerous
                    if global_steps >= next_target_update:
                        next_target_update += self.frozen_global_steps
                        if next_target_update <= global_steps:
                            # TODO use warn from the logger
                            logging.warning(yellow("Global steps ({}) <= next target update ({}).".format(
                                global_steps, next_target_update)))

                        self._session.run(self.global_network.ops.unfreeze)
                # Logs & tests
                if self.local_steps_per_epoch * self._epoch <= self.local_steps:
                    self._epoch += 1

                    if self.thread_index == 0:
                        self._print_train_log(self.train_scores, overall_start_time, last_log_time, local_steps_for_log)

                        if self._run_tests:
                            test_scores, actions, frameskips = self.test(deterministic=self.deterministic_testing)

                        if self.write_summaries:
                            train_summary = self._session.run(self._summaries,
                                                              {self.scores_placeholder: self.train_scores})
                            self._train_writer.add_summary(train_summary, global_steps)
                            if self._run_tests:
                                test_summary = self._session.run(self._summaries,
                                                                 {self.scores_placeholder: test_scores})
                                self._test_writer.add_summary(test_summary, global_steps)

                        last_log_time = time.time()
                        local_steps_for_log = 0

                        log("Learning rate: {}".format(self._session.run(self.learning_rate)))

                        # Saves model
                        if self._epoch % self.save_interval == 0:
                            self.save_model()
                        log("")
                    self.train_scores = []
                    self.train_actions = []
                    self.train_frameskips = []

        except (SignalException, ViZDoomUnexpectedExitException):
            threadsafe_print(red("Thread #{} aborting(ViZDoom killed).".format(self.thread_index)))
    def run(self):
        # TODO this method is ugly, make it nicer
        try:
            overall_start_time = time.time()
            last_log_time = overall_start_time
            local_steps_for_log = 0
            while self._epoch <= self._epochs:
                steps = self.make_training_step()
                local_steps_for_log += steps
                global_steps = self._global_steps_counter.inc(steps)
                # Logs & tests
                if self.local_steps_per_epoch * self._epoch <= self.local_steps:
                    self._epoch += 1

                    if self.thread_index == 0:
                        self._print_train_log(self.train_scores,
                                              overall_start_time,
                                              last_log_time,
                                              local_steps_for_log)

                        if self._run_tests:
                            test_scores = self.test(
                                deterministic=self.deterministic_testing)

                        if self.write_summaries:
                            train_summary = self._session.run(
                                self._summaries,
                                {self.scores_placeholder: self.train_scores})
                            self._train_writer.add_summary(
                                train_summary, global_steps)
                            if self._run_tests:
                                test_summary = self._session.run(
                                    self._summaries,
                                    {self.scores_placeholder: test_scores})
                                self._test_writer.add_summary(
                                    test_summary, global_steps)

                        last_log_time = time.time()
                        local_steps_for_log = 0
                        log("Learning rate: {}".format(
                            self._session.run(self.learning_rate)))

                        # Saves model
                        if self._epoch % self.save_interval == 0:
                            self.save_model()
                        log("")
                    self.train_scores = []

        except (SignalException, ViZDoomUnexpectedExitException):
            threadsafe_print(
                red("Thread #{} aborting(ViZDoom killed).".format(
                    self.thread_index)))
Example #5
0
    def print_epoch_log(prefix, scores, steps, epoch_time):
        mean_score = np.mean(scores)
        score_std = np.std(scores)
        min_score = np.min(scores)
        max_score = np.max(scores)
        episodes = len(scores)

        steps_per_sec = steps / epoch_time
        mil_steps_per_hour = steps_per_sec * 3600 / 1000000.0
        log("{}: Episodes: {}, mean: {}, min: {}, max: {}, "
            " Speed: {:.0f} STEPS/s, {:.2f}M STEPS/hour, time: {}".format(
                prefix, episodes,
                green("{:0.3f}±{:0.2f}".format(mean_score, score_std)),
                red("{:0.3f}".format(min_score)),
                blue("{:0.3f}".format(max_score)), steps_per_sec,
                mil_steps_per_hour, sec_to_str(epoch_time)))
    def run(self):
        # TODO this method is ugly, make it nicer
        try:
            overall_start_time = time.time()
            last_log_time = overall_start_time
            local_steps_for_log = 0
            while self._epoch <= self._epochs:
                steps = self.make_training_step()
                local_steps_for_log += steps
                global_steps = self._global_steps_counter.inc(steps)
                # Logs & tests
                if self.local_steps_per_epoch * self._epoch <= self.local_steps:
                    self._epoch += 1

                    if self.thread_index == 0:
                        log("EPOCH {}".format(self._epoch - 1))
                        self._print_train_log(
                            self.train_scores, overall_start_time, last_log_time, local_steps_for_log)
                        run_test_this_epoch = ((self._epoch - 1) % self.test_interval) == 0
                        if self._run_tests and run_test_this_epoch:
                            test_scores, test_actions, test_frameskips = self.test(
                                deterministic=self.deterministic_testing)

                        if self.write_summaries:
                            train_summary = self._session.run(self._summaries,
                                                              {self.scores_placeholder: self.train_scores,
                                                               self.actions_placeholder: self.train_actions,
                                                               self.frameskips_placeholder: self.train_frameskips})
                            self._train_writer.add_summary(train_summary, global_steps)
                            if self._run_tests and run_test_this_epoch:
                                test_summary = self._session.run(self._summaries,
                                                                 {self.scores_placeholder: test_scores,
                                                                  self.actions_placeholder: test_actions,
                                                                  self.frameskips_placeholder: test_frameskips})
                                self._test_writer.add_summary(test_summary, global_steps)

                        last_log_time = time.time()
                        local_steps_for_log = 0
                        log("Learning rate: {}".format(self._session.run(self.learning_rate)))

                        # Saves model
                        if self._epoch % self.save_interval == 0:
                            self.save_model()
                        now = datetime.datetime.now()
                        log("Time: {:2d}:{:02d}".format(now.hour, now.minute))

                        if self.show_heatmaps:
                            log("Train heatmaps:")
                            log(self.heatmap(self.train_actions, self.train_frameskips))
                            log("")
                            if run_test_this_epoch:
                                log("Test heatmaps:")
                                log(self.heatmap(test_actions, test_frameskips))
                        log("")
                    self.train_scores = []
                    self.train_actions = []
                    self.train_frameskips = []

            threadsafe_print("Thread {} finished.".format(self.thread_index))
        except (SignalException, ViZDoomUnexpectedExitException):
            threadsafe_print(red("Thread #{} aborting(ViZDoom killed).".format(self.thread_index)))