(action, )] + params.LEARNING_RATE * (
                                      reward + params.DISCOUNT * expected_q)

                elif new_state[0] >= params.env.goal_position:
                    print(f"finish in episode {episode}")
                    print(
                        f'Episode: {episode:>5d}, average reward: {average_reward:>4.1f}, current epsilon: {params.EPSILON:>1.2f}, discount: {params.DISCOUNT:>2.2f},Learning Rate: {params.LEARNING_RATE}'
                    )
                    table[discrete_state + (action, )] = params.REWARD_END

                discrete_state = new_discrete_state

                episode_reward += reward

            # Get the BEST Value in all episodes
            if params.get_bestValue() < episode_reward:
                params.set_bestValue(episode_reward)
                params.set_bestEpisode(episode)
                params.set_bestLEARNING_RATE(step_size)
                #params.set_bestDISCOUNT(discount)
                best_stepsize[(agent, step_size)].append(episode_reward)
                if agent == qLearningAgent:
                    QLearning_stats['episode'].append(episode)
                    QLearning_stats['learning_rate'].append(step_size)
                    QLearning_stats['best_value'].append(episode_reward)
                    #QLearning_best['episode'] = episode
                    #QLearning_best['learning_rate'] = episode
                    #QLearning_best['best_value'] = episode

                elif agent == sarsaAgent:
                    SARSA_stats['episode'].append(episode)
                              (action, )] + params.LEARNING_RATE * (
                                  reward + params.DISCOUNT * expected_q)

            elif new_state[0] >= params.env.goal_position:
                print(f"finish in episode {episode}")
                print(
                    f'Episode: {episode:>5d}, average reward: {average_reward:>4.1f}, current epsilon: {params.EPSILON:>1.2f}, discount: {params.DISCOUNT:>2.2f},Learning Rate: {params.LEARNING_RATE}'
                )
                table[discrete_state + (action, )] = params.REWARD_END

            discrete_state = new_discrete_state

            episode_reward += reward

        # Get the BEST Value in all episodes
        if params.get_bestValue() < episode_reward:
            params.set_bestValue(episode_reward)
            params.set_bestEpisode(episode)

        # EPSILON DECAY for exploratory
        if params.END_EPSILON_DECAYING >= episode >= params.START_EPSILON_DECAYING:

            params.set_epsilon(params.epsilon_decay_value)

        # show some STATS
        ep_rewards.append(episode_reward)

        # Append the sum of reward at the end of the episode
        totalReward[type(agent).__name__].append(episode_reward)

        if not episode % params.STATS_EVERY: