def simple_example(): grid = Gridworld.default_grid() print('Rewards:') print_values(grid.rewards, grid) policy = { (2, 0): 'U', (1, 0): 'U', (0, 0): 'R', (0, 1): 'R', (0, 2): 'R', (1, 2): 'R', (2, 1): 'R', (2, 2): 'R', (2, 3): 'U', } values = first_visit_monte_carlo(grid, 100, policy) print('Values:') print_values(values, grid) print('Policy:') print_policy(policy, grid)
def simple_example(): grid = Gridworld.default_grid() print('Rewards:') print_values(grid.rewards, grid) policy = { (2, 0): 'U', (1, 0): 'U', (0, 0): 'R', (0, 1): 'R', (0, 2): 'R', (1, 2): 'R', (2, 1): 'R', (2, 2): 'R', (2, 3): 'U', } values = td_zero(grid, policy) print('Values:') print_values(values, grid) print('Policy:') print_policy(policy, grid)
def mc_prediction(): grid = Gridworld.default_grid() print('Rewards:') print_values(grid.rewards, grid) policy = { (2, 0): 'U', (1, 0): 'U', (0, 0): 'R', (0, 1): 'R', (0, 2): 'R', (1, 2): 'U', (2, 1): 'L', (2, 2): 'U', (2, 3): 'L', } values, deltas = approx_monte_carlo(grid, policy) plt.plot(deltas) plt.show() print('Values:') print_values(values, grid) print('Policy:') print_policy(policy, grid)
def simple_example(): grid = Gridworld.default_grid() values_uniform = policy_evaluation(grid, 1) print('values for uniformly random actions:') print_values(values_uniform, grid) print('\n\n') fixed_policy = { (2, 0): 'U', (1, 0): 'U', (0, 0): 'R', (0, 1): 'R', (0, 2): 'R', (1, 2): 'R', (2, 1): 'R', (2, 2): 'R', (2, 3): 'U', } print_policy(fixed_policy, grid) fixed_values = policy_evaluation(grid, 0.9, fixed_policy) print('Values for fixed policy:') print_values(fixed_values, grid)