#QLearning_best['episode'] = episode #QLearning_best['learning_rate'] = episode #QLearning_best['best_value'] = episode elif agent == sarsaAgent: SARSA_stats['episode'].append(episode) SARSA_stats['learning_rate'].append(step_size) SARSA_stats['best_value'].append(episode_reward) else: expectedSARSA_stats['episode'].append(episode) expectedSARSA_stats['learning_rate'].append(step_size) expectedSARSA_stats['best_value'].append(episode_reward) # EPSILON DECAY for exploratory if params.END_EPSILON_DECAYING >= episode >= params.START_EPSILON_DECAYING: params.set_epsilon(params.epsilon_decay_value) # show some STATS ep_rewards.append(episode_reward) # Append the sum of reward at the end of the episode #totalReward[type(agent).__name__].append(episode_reward) all_reward_sums[(agent, step_size)].append(episode_reward) if not episode % params.STATS_EVERY: average_reward = sum(ep_rewards[-params.STATS_EVERY:]) / len( ep_rewards[-params.STATS_EVERY:]) aggr_ep_rewards['ep'].append(episode) aggr_ep_rewards['avg'].append(average_reward) aggr_ep_rewards['max'].append( max(ep_rewards[-params.STATS_EVERY:]))
SARSA_stats['episode'].append(episode) SARSA_stats['learning_rate'].append(step_size) SARSA_stats['best_value'].append(episode_reward) SARSA_stats['discount'].append(discount) SARSA_stats['epsilondecay'].append(epsilondecay) else: expectedSARSA_stats['episode'].append(episode) expectedSARSA_stats['learning_rate'].append(step_size) expectedSARSA_stats['best_value'].append(episode_reward) expectedSARSA_stats['discount'].append(discount) expectedSARSA_stats['epsilondecay'].append(epsilondecay) # EPSILON DECAY for exploratory if params.END_EPSILON_DECAYING >= episode >= params.START_EPSILON_DECAYING: params.set_epsilon(epsilondecay) # show some STATS ep_rewards.append(episode_reward) # Append the sum of reward at the end of the episode #totalReward[type(agent).__name__].append(episode_reward) #all_reward_sums[(agent, step_size)].append(episode_reward) if not episode % params.STATS_EVERY: average_reward = sum(ep_rewards[-params.STATS_EVERY:])/len(ep_rewards[-params.STATS_EVERY:]) aggr_ep_rewards['ep'].append(episode) aggr_ep_rewards['avg'].append(average_reward) aggr_ep_rewards['max'].append(max(ep_rewards[-params.STATS_EVERY:])) aggr_ep_rewards['min'].append(min(ep_rewards[-params.STATS_EVERY:])) #print(f'Agent: {type(agent).__name__}, Learning Rate: {params.LEARNING_RATE}, discount: {params.DISCOUNT:>2.2f}, episode: {episode:>5d}, average reward: {average_reward:>4.1f},min reward: {min(ep_rewards[-params.STATS_EVERY:])}, max reward: {max(ep_rewards[-params.STATS_EVERY:])},current epsilon: {params.EPSILON:>1.3f},epsilon_decay_value: {params.epsilon_decay_value:>1.5f}')