one_episode_reward_sum = np.sum(one_episode_cartpole_reward_list) game_raw_data.episode_total_step_list.append(time_step) game_raw_data.episode_total_reward_list.append(one_episode_reward_sum) # if (episode_idx+1) % data_analysis_interval == 0: # epi_range_str = str(episode_idx+1) + " to " + str(episode_idx + data_analysis_interval + 1) # total_reward = np.round(np.sum(one_episoe_cartpole_reward_list), decimals = 3) # average_reward = np.round(np.mean(game_raw_data.episode_total_reward_list[-data_analysis_interval:]), decimals = 3) # average_time_step = np.round(np.mean(game_raw_data.episode_total_step_list[-data_analysis_interval:]), decimals = 3) # print("episode: {}, avg time step: {}, total reward: {}, average_reward: {}, length: {}\n".format(epi_range_str, average_time_step, total_reward, average_reward, time_step)) # data analysis section if (episode_idx + 1) % data_analysis_interval == 0: interval_time_step_data_list: list = game_raw_data.episode_total_step_list[ -data_analysis_interval:] interval_time_step_mean: float = StatisticUtil.derive_mean( interval_time_step_data_list) interval_time_step_stddev: float = StatisticUtil.derive_standard_deviation( interval_time_step_data_list, interval_time_step_mean) anay_data.time_step_mean_list.append(interval_time_step_mean) anay_data.time_step_std_dev_list.append(interval_time_step_stddev) # anay_data.name_datalist_dict["mean"].append(interval_time_step_mean) # anay_data.name_datalist_dict["standard_deviation"].append(interval_time_step_stddev) interval_reward_data_list: list = game_raw_data.episode_total_reward_list[ -data_analysis_interval:] interval_reward_mean: float = StatisticUtil.derive_mean( interval_reward_data_list) interval_reward_stddev: float = StatisticUtil.derive_standard_deviation( interval_reward_data_list, interval_reward_mean)
deep_q_agent.update_target_policy_network() game_raw_data.episode_total_step_list.append(time_step) # data analysis section if (episode_idx + 1) % data_analysis_interval == 0: start_interval: int = episode_idx - data_analysis_interval + 2 start_interval_str: str = CommonUtil.format_int_to_str_length( start_interval, len(str(episodes_to_run))) end_interval_str: str = CommonUtil.format_int_to_str_length( episode_idx + 1, len(str(episodes_to_run))) interval_data_list: list = game_raw_data.episode_total_step_list[ -data_analysis_interval:] interval_mean: float = StatisticUtil.derive_mean( interval_data_list) interval_stddev: float = StatisticUtil.derive_standard_deviation( interval_data_list, interval_mean) interval_stddev_to_hundredths: float = CommonUtil.round_to_hundredths( interval_stddev) anay_data.name_datalist_dict["mean"].append(interval_mean) anay_data.name_datalist_dict["standard_deviation"].append( interval_stddev) msg = "Evaluation of episode interval: " + start_interval_str + " to " + end_interval_str + \ "\t\tMean(time_step): " + str(interval_mean) + \ "\t\tStandard Deviation(time_step): " + str(interval_stddev_to_hundredths) print(msg) anay_data.msg_list.append(msg)