def _print_train_log(self, scores, overall_start_time, last_log_time, steps):
        current_time = time.time()
        mean_score = np.mean(scores)
        score_std = np.std(scores)
        min_score = np.min(scores)
        max_score = np.max(scores)

        elapsed_time = time.time() - overall_start_time
        global_steps = self._global_steps_counter.get()
        local_steps_per_sec = steps / (current_time - last_log_time)
        global_steps_per_sec = global_steps / elapsed_time
        global_mil_steps_per_hour = global_steps_per_sec * 3600 / 1000000.0
        log(
            "TRAIN: {}(GlobalSteps), {} episodes, mean: {}, min: {}, max: {}, "
            "\nLocalSpd: {:.0f} STEPS/s GlobalSpd: "
            "{} STEPS/s, {:.2f}M STEPS/hour, total elapsed time: {}".format(
                global_steps,
                len(scores),
                green("{:0.3f}±{:0.2f}".format(mean_score, score_std)),
                red("{:0.3f}".format(min_score)),
                blue("{:0.3f}".format(max_score)),
                local_steps_per_sec,
                blue("{:.0f}".format(
                    global_steps_per_sec)),
                global_mil_steps_per_hour,
                sec_to_str(elapsed_time)
            ))
Beispiel #2
0
def measure(name, iters=5000, **settings):
    print(name)
    for k, v in settings.items():
        print("\t{}: {}".format(k, v))

    # Vizdoom wrapper
    doom_wrapper = VizdoomWrapper(**settings)
    start = time()
    for _ in trange(iters, leave=False):
        current_img, current_misc = doom_wrapper.get_current_state()
        action_index = randint(0, doom_wrapper.actions_num - 1)
        doom_wrapper.make_action(action_index)

        if doom_wrapper.is_terminal():
            doom_wrapper.reset()
    end = time()
    wrapper_t = (end - start)

    # Vanilla vizdoom:
    doom = vzd.DoomGame()
    if "scenarios_path" not in settings:
        scenarios_path = vzd.__path__[0] + "/scenarios"
    else:
        scenarios_path = settings["scenarios_path"]
    config_file = scenarios_path + "/" + settings["config_file"]
    doom.load_config(config_file)
    doom.set_window_visible(False)
    doom.set_screen_format(vzd.ScreenFormat.GRAY8)
    doom.set_screen_resolution(vzd.ScreenResolution.RES_160X120)
    doom.init()
    actions = [
        list(a)
        for a in it.product([0, 1],
                            repeat=len(doom.get_available_game_variables()))
    ]
    start = time()
    frame_skip = settings["frame_skip"]
    for _ in trange(iters, leave=False):
        if doom.is_episode_finished():
            doom.new_episode()
        doom.make_action(choice(actions), frame_skip)

    end = time()
    vanilla_t = end - start
    print(green("\twrapper: {:0.2f} steps/s".format(iters / wrapper_t)))
    print(
        green("\twrapper: {:0.2f} s/1000 steps".format(wrapper_t / iters *
                                                       1000)))
    print(blue("\tvanilla: {:0.2f} steps/s".format(iters / vanilla_t)))
    print(
        blue("\tvanilla: {:0.2f} s/1000 steps\n".format(vanilla_t / iters *
                                                        1000)))
    def test(self, episodes_num=None, deterministic=True):
        if episodes_num is None:
            episodes_num = self.test_episodes_per_epoch

        test_start_time = time.time()
        test_rewards = []
        test_actions = []
        test_frameskips = []
        for _ in trange(episodes_num, desc="Testing", file=sys.stdout,
                        leave=False, disable=not self.enable_progress_bar):
            total_reward, actions, frameskips, _ = self.run_episode(deterministic=deterministic, return_stats=True)
            test_rewards.append(total_reward)
            test_actions += actions
            test_frameskips += frameskips

        self.doom_wrapper.reset()
        if self.local_network.has_state():
            self.local_network.reset_state()

        test_end_time = time.time()
        test_duration = test_end_time - test_start_time
        min_score = np.min(test_rewards)
        max_score = np.max(test_rewards)
        mean_score = np.mean(test_rewards)
        score_std = np.std(test_rewards)
        log(
            "TEST: mean: {}, min: {}, max: {}, test time: {}".format(
                green("{:0.3f}±{:0.2f}".format(mean_score, score_std)),
                red("{:0.3f}".format(min_score)),
                blue("{:0.3f}".format(max_score)),
                sec_to_str(test_duration)))
        return test_rewards, test_actions, test_frameskips
Beispiel #4
0
    def print_epoch_log(prefix, scores, steps, epoch_time):
        mean_score = np.mean(scores)
        score_std = np.std(scores)
        min_score = np.min(scores)
        max_score = np.max(scores)
        episodes = len(scores)

        steps_per_sec = steps / epoch_time
        mil_steps_per_hour = steps_per_sec * 3600 / 1000000.0
        log("{}: Episodes: {}, mean: {}, min: {}, max: {}, "
            " Speed: {:.0f} STEPS/s, {:.2f}M STEPS/hour, time: {}".format(
                prefix, episodes,
                green("{:0.3f}±{:0.2f}".format(mean_score, score_std)),
                red("{:0.3f}".format(min_score)),
                blue("{:0.3f}".format(max_score)), steps_per_sec,
                mil_steps_per_hour, sec_to_str(epoch_time)))