Esempio n. 1
0
        one_episode_reward_sum = np.sum(one_episode_cartpole_reward_list)
        game_raw_data.episode_total_step_list.append(time_step)
        game_raw_data.episode_total_reward_list.append(one_episode_reward_sum)

        # if (episode_idx+1) % data_analysis_interval == 0:
        #     epi_range_str = str(episode_idx+1) + " to " + str(episode_idx + data_analysis_interval + 1)
        #     total_reward = np.round(np.sum(one_episoe_cartpole_reward_list), decimals = 3)
        #     average_reward = np.round(np.mean(game_raw_data.episode_total_reward_list[-data_analysis_interval:]), decimals = 3)
        #     average_time_step = np.round(np.mean(game_raw_data.episode_total_step_list[-data_analysis_interval:]), decimals = 3)
        #     print("episode: {}, avg time step: {}, total reward: {}, average_reward: {}, length: {}\n".format(epi_range_str, average_time_step, total_reward, average_reward, time_step))

        # data analysis section
        if (episode_idx + 1) % data_analysis_interval == 0:
            interval_time_step_data_list: list = game_raw_data.episode_total_step_list[
                -data_analysis_interval:]
            interval_time_step_mean: float = StatisticUtil.derive_mean(
                interval_time_step_data_list)
            interval_time_step_stddev: float = StatisticUtil.derive_standard_deviation(
                interval_time_step_data_list, interval_time_step_mean)

            anay_data.time_step_mean_list.append(interval_time_step_mean)
            anay_data.time_step_std_dev_list.append(interval_time_step_stddev)
            # anay_data.name_datalist_dict["mean"].append(interval_time_step_mean)
            # anay_data.name_datalist_dict["standard_deviation"].append(interval_time_step_stddev)

            interval_reward_data_list: list = game_raw_data.episode_total_reward_list[
                -data_analysis_interval:]
            interval_reward_mean: float = StatisticUtil.derive_mean(
                interval_reward_data_list)
            interval_reward_stddev: float = StatisticUtil.derive_standard_deviation(
                interval_reward_data_list, interval_reward_mean)
                deep_q_agent.update_target_policy_network()

        game_raw_data.episode_total_step_list.append(time_step)

        # data analysis section
        if (episode_idx + 1) % data_analysis_interval == 0:
            start_interval: int = episode_idx - data_analysis_interval + 2
            start_interval_str: str = CommonUtil.format_int_to_str_length(
                start_interval, len(str(episodes_to_run)))
            end_interval_str: str = CommonUtil.format_int_to_str_length(
                episode_idx + 1, len(str(episodes_to_run)))

            interval_data_list: list = game_raw_data.episode_total_step_list[
                -data_analysis_interval:]

            interval_mean: float = StatisticUtil.derive_mean(
                interval_data_list)
            interval_stddev: float = StatisticUtil.derive_standard_deviation(
                interval_data_list, interval_mean)
            interval_stddev_to_hundredths: float = CommonUtil.round_to_hundredths(
                interval_stddev)

            anay_data.name_datalist_dict["mean"].append(interval_mean)
            anay_data.name_datalist_dict["standard_deviation"].append(
                interval_stddev)


            msg = "Evaluation of episode interval: " + start_interval_str + " to " + end_interval_str + \
                  "\t\tMean(time_step): " + str(interval_mean) + \
                  "\t\tStandard Deviation(time_step): " + str(interval_stddev_to_hundredths)
            print(msg)
            anay_data.msg_list.append(msg)