Ejemplo n.º 1
0
import gym
from mdps import solvers

LEFT = 0
DOWN = 1
RIGHT = 2
UP = 3

env_spec = gym.spec('FrozenLake-v0')

env_spec._kwargs['is_slippery'] = False

env = gym.make('FrozenLake-v0')

env.reset()

print solvers.value_iteration(env.env)
Ejemplo n.º 2
0
four_slip_policy.axis('off')
four_slip_value.axis('off')
eight_value.axis('off')
eight_policy.axis('off')

f.suptitle("Final Ice Policies")

_, env = get_env('4x4', slippery=True)

policy, _, iters = policy_improvement(env, discount_factor=DISCOUNT_FACTOR)

visualize_ice_policy(env, policy, ax=four_slip_policy)
four_slip_policy.set_title("Policy Improvement {} Iters".format(iters))

policy, _, iters = value_iteration(env, discount_factor=DISCOUNT_FACTOR)

visualize_ice_policy(env, policy, ax=four_slip_value)
four_slip_value.set_title("Value Iteration {} Iters".format(iters))

_, env = get_env('8x8', slippery=True)

policy, _, iters = policy_improvement(env, discount_factor=DISCOUNT_FACTOR)

visualize_ice_policy(env, policy, ax=eight_policy)
eight_policy.set_title("Policy Improvement {} Iters".format(iters))

policy, _, iters = value_iteration(env, discount_factor=DISCOUNT_FACTOR)

visualize_ice_policy(env, policy, ax=eight_value)
eight_value.set_title("Value Iteration {} Iters".format(iters))
Ejemplo n.º 3
0
import gym
from mdps import solvers
from experiments import get_env
from mdps.visualize_policy import visualize_solution

name, env = get_env('taxi')

p, _, _ = solvers.value_iteration(env, discount_factor=.95)

visualize_solution(env, p)
Ejemplo n.º 4
0
import gym
from mdps import solvers
from mdps.util import make_random_policy
from mdps.visualize_policy import visualize_ice
LEFT = 0
DOWN = 1
RIGHT = 2
UP = 3

environment_name = 'Taxi-v2'

env_spec = gym.spec('Taxi-v2')
#env_spec._kwargs['is_slippery'] = False

env = gym.make('FrozenLake-v0')

env.reset()

visualize_ice(env.env)

print solvers.value_iteration(env.env, discount_factor=.9)[0]
print solvers.policy_improvement(env.env, discount_factor=.9)[0]

Ejemplo n.º 5
0
from experiments.get_experiment import get_env
from mdps.solvers import policy_eval, policy_improvement, value_iteration
from mdps.visualize_policy import visualize_ice_policy, visualize_solution
import numpy as np
import matplotlib.pyplot as plt
from mdps import evaluate_solutions

name, env = get_env('taxi')

pol, rewards, scores = value_iteration(env, discount_factor=.92)

print env


def experiment(current_env, eval_func):

    x, scores_expected, num_iters, scores_actual = [], [], [], []
    for i in np.linspace(0.3, .98, 25):
        print i
        policy, score, iters = eval_func(current_env, discount_factor=i)

        # we just grab the score from the expected starting state
        scores_expected.append(score[0])

        if iters > 9999:
            iters = 0

        num_iters.append(iters)
        score_actual = evaluate_solutions(current_env, policy)
        print score_actual
        scores_actual.append(score_actual)