Ejemplo n.º 1
0
def test_value_iteration():
    #load the test data
    vp = np.load("../data/test_data/test_value_iteration.npy")
    test_value = vp[:, 0].reshape(-1, 1)
    test_pi = vp[:, 1].reshape(-1, 1)

    # specify world parameters
    num_cols = 12
    num_rows = 9
    obstructions = np.array([[8, 6], [7, 6], [6, 6], [5, 6], [4, 6], [3, 6],
                             [3, 7], [3, 8], [3, 9]])
    bad_states = np.array([[2, 1]])
    start_state = np.array([[0, 1]])
    goal_state = np.array([[7, 8]])

    # create model
    gw = GridWorld(num_rows=num_rows,
                   num_cols=num_cols,
                   start_state=start_state,
                   goal_states=goal_state)
    gw.add_obstructions(obstructed_states=obstructions, bad_states=bad_states)
    gw.add_rewards(step_reward=-1, goal_reward=10, bad_state_reward=-6)
    gw.add_transition_probability(p_good_transition=0.7, bias=0.5)
    gw.add_discount(discount=0.9)
    model = gw.create_gridworld()

    # solve with value iteration
    value_function, pi = value_iteration(model, maxiter=100)

    # test value iteration outputs
    assert np.all(value_function == test_value)
    assert np.all(pi == test_pi)
Ejemplo n.º 2
0
def test_gridworld():
    # load the test data
    grid_world = loadmat('../data/test_data/gridworld.mat')['model']

    # specify world parameters
    num_cols = 12
    num_rows = 9
    obstructions = np.array([[8, 6], [7, 6], [6, 6], [5, 6], [4, 6], [3, 6],
                             [3, 7], [3, 8], [3, 9]])
    bad_states = np.array([[2, 1]])
    start_state = np.array([[0, 1]])
    goal_state = np.array([[7, 8]])

    # create model
    gw = GridWorld(num_rows=num_rows,
                   num_cols=num_cols,
                   start_state=start_state,
                   goal_states=goal_state)
    gw.add_obstructions(obstructed_states=obstructions, bad_states=bad_states)
    gw.add_rewards(step_reward=-1, goal_reward=10, bad_state_reward=-6)
    gw.add_transition_probability(p_good_transition=0.7, bias=0.5)
    gw.add_discount(discount=0.9)
    model = gw.create_gridworld()

    # run tests
    assert np.all(model.R == grid_world['R'][0][0][:, 0].reshape(-1, 1))
    assert np.all(model.P[:, :, 0] == grid_world['P'][0][0][:, :, 0])
    assert np.all(model.P[:, :, 1] == grid_world['P'][0][0][:, :, 1])
    assert np.all(model.P[:, :, 2] == grid_world['P'][0][0][:, :, 2])
    assert np.all(model.P[:, :, 3] == grid_world['P'][0][0][:, :, 3])
Ejemplo n.º 3
0
def test_sarsa():
    # set seed
    np.random.seed(1)
    # load the test data
    vp = np.load("../data/test_data/test_sarsa.npy")
    test_q = vp[:, 0].reshape(-1, 1)
    test_pi = vp[:, 1].reshape(-1, 1)

    # specify world parameters
    num_cols = 4
    num_rows = 4
    obstacles = np.array([[1, 1], [2, 1], [1, 2]])
    bad_states = np.array([[3, 0]])
    start_state = np.array([[0, 0]])
    goal_state = np.array([[3, 3]])

    # create world
    gw = GridWorld(num_cols,
                   num_rows,
                   start_state=start_state,
                   goal_states=goal_state)
    gw.add_obstructions(obstructed_states=obstacles, bad_states=bad_states)
    gw.add_rewards(step_reward=-1, goal_reward=10, bad_state_reward=-6)
    gw.add_transition_probability(p_good_transition=0.8, bias=0.5)
    gw.add_discount(0.9)
    model = gw.create_gridworld()

    # solve with sarsa
    q, pi, _ = sarsa(model, alpha=0.1, epsilon=0.2, maxiter=100, maxeps=1000)

    # test value iteration outputs
    assert np.all(q == test_q)
    assert np.all(pi == test_pi)
Ejemplo n.º 4
0
def test_cliffworld():
    # load the test data
    grid_world = loadmat('../data/test_data/cliffworld.mat')['model']

    # specify world parameters
    num_rows = 5
    num_cols = 10
    restart_states = np.array([[4, 1], [4, 2], [4, 3], [4, 4], [4, 5], [4, 6],
                               [4, 7]])
    obstructed_states = np.array([[0, 9], [1, 9], [2, 9], [3, 9], [4, 9]])
    start_state = np.array([[4, 0]])
    goal_states = np.array([[4, 8]])

    # create model
    gw = GridWorld(num_rows=num_rows,
                   num_cols=num_cols,
                   start_state=start_state,
                   goal_states=goal_states)
    gw.add_obstructions(obstructed_states=obstructed_states,
                        restart_states=restart_states)
    gw.add_rewards(step_reward=-1, goal_reward=10, restart_state_reward=-100)
    gw.add_transition_probability(p_good_transition=1, bias=0)
    gw.add_discount(discount=0.9)
    model = gw.create_gridworld()

    # run tests
    assert np.all(model.R == grid_world['R'][0][0][:, 0].reshape(-1, 1))
    assert np.all(model.P[:, :, 0] == grid_world['P'][0][0][:, :, 0])
    assert np.all(model.P[:, :, 1] == grid_world['P'][0][0][:, :, 1])
    assert np.all(model.P[:, :, 2] == grid_world['P'][0][0][:, :, 2])
    assert np.all(model.P[:, :, 3] == grid_world['P'][0][0][:, :, 3])
Ejemplo n.º 5
0
def test_smallworld():
    small_world = loadmat('../data/test_data/smallworld.mat')['model']

    # specify world parameters
    num_cols = 4
    num_rows = 4
    obstacles = np.array([[1, 1], [2, 1], [1, 2]])
    start_state = np.array([[0, 0]])
    goal_state = np.array([[3, 3]])

    # create model
    gw = GridWorld(num_rows=num_rows,
                   num_cols=num_cols,
                   start_state=start_state,
                   goal_states=goal_state)
    gw.add_obstructions(obstructed_states=obstacles)
    gw.add_rewards(step_reward=-1, goal_reward=10)
    gw.add_transition_probability(p_good_transition=0.8, bias=0.5)
    gw.add_discount(discount=0.9)
    model = gw.create_gridworld()

    # run tests
    assert np.all(model.R == small_world['R'][0][0][:, 0].reshape(-1, 1))
    assert np.all(model.P[:, :, 0] == small_world['P'][0][0][:, :, 0])
    assert np.all(model.P[:, :, 1] == small_world['P'][0][0][:, :, 1])
    assert np.all(model.P[:, :, 2] == small_world['P'][0][0][:, :, 2])
    assert np.all(model.P[:, :, 3] == small_world['P'][0][0][:, :, 3])
obstructions = np.array([[0, 7], [1, 1], [1, 2], [1, 3], [1, 7], [2,
                                                                  1], [2, 3],
                         [2, 7], [3, 1], [3, 3], [3, 5], [4, 3], [4,
                                                                  5], [4, 7],
                         [5, 3], [5, 7], [5, 9], [6, 3], [6, 9], [7,
                                                                  1], [7, 6],
                         [7, 7], [7, 8], [7, 9], [8, 1], [8, 5], [8, 6],
                         [9, 1]])
bad_states = np.array([[1, 9], [4, 2], [4, 4], [7, 5], [9, 9]])
restart_states = np.array([[3, 7], [8, 2]])
start_state = np.array([[0, 4]])
goal_states = np.array([[0, 9], [2, 2], [8, 7]])

# create model
gw = GridWorld(num_rows=num_rows,
               num_cols=num_cols,
               start_state=start_state,
               goal_states=goal_states)
gw.add_obstructions(obstructed_states=obstructions,
                    bad_states=bad_states,
                    restart_states=restart_states)
gw.add_rewards(step_reward=-1,
               goal_reward=10,
               bad_state_reward=-6,
               restart_state_reward=-10)
gw.add_transition_probability(p_good_transition=0.7, bias=0.5)
gw.add_discount(discount=0.9)
model = gw.create_gridworld()

# plot world
path = "../doc/imgs/unsolved_gridworld.png"
plot_gridworld(model, title="Test world", path=path)