(action, )] + params.LEARNING_RATE * ( reward + params.DISCOUNT * expected_q) elif new_state[0] >= params.env.goal_position: print(f"finish in episode {episode}") print( f'Episode: {episode:>5d}, average reward: {average_reward:>4.1f}, current epsilon: {params.EPSILON:>1.2f}, discount: {params.DISCOUNT:>2.2f},Learning Rate: {params.LEARNING_RATE}' ) table[discrete_state + (action, )] = params.REWARD_END discrete_state = new_discrete_state episode_reward += reward # Get the BEST Value in all episodes if params.get_bestValue() < episode_reward: params.set_bestValue(episode_reward) params.set_bestEpisode(episode) params.set_bestLEARNING_RATE(step_size) #params.set_bestDISCOUNT(discount) best_stepsize[(agent, step_size)].append(episode_reward) if agent == qLearningAgent: QLearning_stats['episode'].append(episode) QLearning_stats['learning_rate'].append(step_size) QLearning_stats['best_value'].append(episode_reward) #QLearning_best['episode'] = episode #QLearning_best['learning_rate'] = episode #QLearning_best['best_value'] = episode elif agent == sarsaAgent: SARSA_stats['episode'].append(episode)
(action, )] + params.LEARNING_RATE * ( reward + params.DISCOUNT * expected_q) elif new_state[0] >= params.env.goal_position: print(f"finish in episode {episode}") print( f'Episode: {episode:>5d}, average reward: {average_reward:>4.1f}, current epsilon: {params.EPSILON:>1.2f}, discount: {params.DISCOUNT:>2.2f},Learning Rate: {params.LEARNING_RATE}' ) table[discrete_state + (action, )] = params.REWARD_END discrete_state = new_discrete_state episode_reward += reward # Get the BEST Value in all episodes if params.get_bestValue() < episode_reward: params.set_bestValue(episode_reward) params.set_bestEpisode(episode) # EPSILON DECAY for exploratory if params.END_EPSILON_DECAYING >= episode >= params.START_EPSILON_DECAYING: params.set_epsilon(params.epsilon_decay_value) # show some STATS ep_rewards.append(episode_reward) # Append the sum of reward at the end of the episode totalReward[type(agent).__name__].append(episode_reward) if not episode % params.STATS_EVERY: