예제 #1
0
def simple_example():
    grid = Gridworld.default_grid()
    print('Rewards:')
    print_values(grid.rewards, grid)
    policy = {
        (2, 0): 'U',
        (1, 0): 'U',
        (0, 0): 'R',
        (0, 1): 'R',
        (0, 2): 'R',
        (1, 2): 'R',
        (2, 1): 'R',
        (2, 2): 'R',
        (2, 3): 'U',
    }
    values = first_visit_monte_carlo(grid, 100, policy)
    print('Values:')
    print_values(values, grid)
    print('Policy:')
    print_policy(policy, grid)
예제 #2
0
def simple_example():
    grid = Gridworld.default_grid()
    print('Rewards:')
    print_values(grid.rewards, grid)
    policy = {
        (2, 0): 'U',
        (1, 0): 'U',
        (0, 0): 'R',
        (0, 1): 'R',
        (0, 2): 'R',
        (1, 2): 'R',
        (2, 1): 'R',
        (2, 2): 'R',
        (2, 3): 'U',
    }
    values = td_zero(grid, policy)
    print('Values:')
    print_values(values, grid)
    print('Policy:')
    print_policy(policy, grid)
예제 #3
0
def mc_prediction():
    grid = Gridworld.default_grid()
    print('Rewards:')
    print_values(grid.rewards, grid)
    policy = {
        (2, 0): 'U',
        (1, 0): 'U',
        (0, 0): 'R',
        (0, 1): 'R',
        (0, 2): 'R',
        (1, 2): 'U',
        (2, 1): 'L',
        (2, 2): 'U',
        (2, 3): 'L',
    }
    values, deltas = approx_monte_carlo(grid, policy)
    plt.plot(deltas)
    plt.show()
    print('Values:')
    print_values(values, grid)
    print('Policy:')
    print_policy(policy, grid)
예제 #4
0
def simple_example():
    grid = Gridworld.default_grid()
    values_uniform = policy_evaluation(grid, 1)
    print('values for uniformly random actions:')
    print_values(values_uniform, grid)
    print('\n\n')

    fixed_policy = {
        (2, 0): 'U',
        (1, 0): 'U',
        (0, 0): 'R',
        (0, 1): 'R',
        (0, 2): 'R',
        (1, 2): 'R',
        (2, 1): 'R',
        (2, 2): 'R',
        (2, 3): 'U',
    }
    print_policy(fixed_policy, grid)
    fixed_values = policy_evaluation(grid, 0.9, fixed_policy)
    print('Values for fixed policy:')
    print_values(fixed_values, grid)