Ejemplo n.º 1
0
def random_policy_test(steps=100, verbose=False):
    print("==== Results with Random Policy ====")
    config = Config()
    actions = 3

    config.current_step = 0
    env = MountainCar(config)

    cumulative_reward = 0
    terminations = 0
    steps_per_episode = []

    episode_steps = 0

    for i in range(steps):
        A = np.random.randint(actions)
        old_state = env.get_current_state()
        next_S, R, terminate = env.step(A)
        if verbose:
            print("Old state:", np.round(old_state, 3), "-->", "Action:", A,
                  "-->", "New state:", np.round(next_S, 3))
        cumulative_reward += R
        episode_steps += 1
        if terminate:
            if verbose:
                print("\n## Reset ##\n")
            if terminate:
                terminations += 1
                steps_per_episode.append(episode_steps)
                episode_steps *= 0
            env.reset()

    if not terminate:
        steps_per_episode.append(episode_steps)

    print("Number of steps per episode:", steps_per_episode)
    print("Number of episodes that reached the end:", terminations)
    average_length = np.average(episode_steps)
    print("The average number of steps per episode was:", average_length)
    print("Cumulative reward:", cumulative_reward)
    print("\n\n")
Ejemplo n.º 2
0
def pumping_action_test(steps=100, verbose=False):
    print("==== Results with Pumping Action Policy ====")
    config = Config()

    config.current_step = 0
    env = MountainCar(config)

    steps_per_episode = []
    return_per_episode = []

    episode_steps = 0
    episode_return = 0
    terminations = 0
    for i in range(steps):
        current_state = env.get_current_state()
        A = 1 + np.sign(current_state[1])
        old_state = env.get_current_state()
        next_S, R, terminate = env.step(A)
        if verbose:
            print("Old state:", np.round(old_state, 3), "-->", "Action:", A,
                  "-->", "New state:", np.round(next_S, 3))

        episode_steps += 1
        episode_return += R
        if terminate:
            terminations += 1
            if verbose:
                print("\n## Reset ##\n")
            env.reset()
            steps_per_episode.append(episode_steps)
            return_per_episode.append(episode_return)
            episode_steps *= 0
            episode_return *= 0

    print("Number of steps per episode:", steps_per_episode)
    print("Number of successful episodes:", terminations)
    print("Return per episode:", return_per_episode)
    print("The average return per episode is:", np.mean(return_per_episode))