#QLearning_best['episode'] = episode
                    #QLearning_best['learning_rate'] = episode
                    #QLearning_best['best_value'] = episode

                elif agent == sarsaAgent:
                    SARSA_stats['episode'].append(episode)
                    SARSA_stats['learning_rate'].append(step_size)
                    SARSA_stats['best_value'].append(episode_reward)
                else:
                    expectedSARSA_stats['episode'].append(episode)
                    expectedSARSA_stats['learning_rate'].append(step_size)
                    expectedSARSA_stats['best_value'].append(episode_reward)

            # EPSILON DECAY for exploratory
            if params.END_EPSILON_DECAYING >= episode >= params.START_EPSILON_DECAYING:
                params.set_epsilon(params.epsilon_decay_value)

            # show some STATS
            ep_rewards.append(episode_reward)

            # Append the sum of reward at the end of the episode
            #totalReward[type(agent).__name__].append(episode_reward)
            all_reward_sums[(agent, step_size)].append(episode_reward)

            if not episode % params.STATS_EVERY:
                average_reward = sum(ep_rewards[-params.STATS_EVERY:]) / len(
                    ep_rewards[-params.STATS_EVERY:])
                aggr_ep_rewards['ep'].append(episode)
                aggr_ep_rewards['avg'].append(average_reward)
                aggr_ep_rewards['max'].append(
                    max(ep_rewards[-params.STATS_EVERY:]))
                                SARSA_stats['episode'].append(episode)
                                SARSA_stats['learning_rate'].append(step_size)
                                SARSA_stats['best_value'].append(episode_reward)
                                SARSA_stats['discount'].append(discount)
                                SARSA_stats['epsilondecay'].append(epsilondecay)
                            else:      
                                expectedSARSA_stats['episode'].append(episode)
                                expectedSARSA_stats['learning_rate'].append(step_size)
                                expectedSARSA_stats['best_value'].append(episode_reward)
                                expectedSARSA_stats['discount'].append(discount)
                                expectedSARSA_stats['epsilondecay'].append(epsilondecay)

                                
                    # EPSILON DECAY for exploratory 
                    if params.END_EPSILON_DECAYING >= episode >= params.START_EPSILON_DECAYING:
                        params.set_epsilon(epsilondecay)

                    # show some STATS 
                    ep_rewards.append(episode_reward)

                    # Append the sum of reward at the end of the episode
                    #totalReward[type(agent).__name__].append(episode_reward)
                    #all_reward_sums[(agent, step_size)].append(episode_reward)

                    if not episode % params.STATS_EVERY:
                        average_reward = sum(ep_rewards[-params.STATS_EVERY:])/len(ep_rewards[-params.STATS_EVERY:])
                        aggr_ep_rewards['ep'].append(episode)
                        aggr_ep_rewards['avg'].append(average_reward)
                        aggr_ep_rewards['max'].append(max(ep_rewards[-params.STATS_EVERY:]))
                        aggr_ep_rewards['min'].append(min(ep_rewards[-params.STATS_EVERY:]))
                        #print(f'Agent: {type(agent).__name__}, Learning Rate: {params.LEARNING_RATE}, discount: {params.DISCOUNT:>2.2f}, episode: {episode:>5d}, average reward: {average_reward:>4.1f},min reward: {min(ep_rewards[-params.STATS_EVERY:])}, max reward: {max(ep_rewards[-params.STATS_EVERY:])},current epsilon: {params.EPSILON:>1.3f},epsilon_decay_value: {params.epsilon_decay_value:>1.5f}')