def main(): with PuddleWorld(start=(0.5, 0.1), resolution=0.05) as world: # R = PuddleReward(rmax=1.0, step_reward=0.1) R = PuddleRewardLFA(weights=[1, -1], rmax=1.0) T = PWTransition() g = PuddleWorldMDP(reward=R, transition=T, discount=0.98) # ------------------------ mdp_planner = PolicyIteration() res = mdp_planner.solve(g) V = res['V'] print(V) print(res['pi']) fig = plt.figure(figsize=(8, 8)) ax = fig.gca() ax = world.visualize(ax, policy=res['pi']) # plt.savefig('world.svg') plt.figure(figsize=(8, 8)) plt.imshow(V.reshape(world.shape).T, # interpolation='nearest', cmap='viridis', origin='lower', vmin=np.min(V), vmax=np.max(V)) plt.grid(False) plt.title('Value function') plt.colorbar(orientation='horizontal') # plt.savefig('world_value.svg') plt.show()
def main(): NUM_STATES = 10 with ChainWorld(num_states=NUM_STATES) as world: R = ChainReward() T = ChainTransition() mdp = ChainMDP(R, T, discount=0.98) planner = PolicyIteration() plan = planner.solve(mdp) print(plan['pi']) fig = plt.figure(figsize=(12, 3)) ax = fig.gca() ax = world.visualize(ax) ax = world.show_policy(ax, policy=plan['pi']) plt.figure(figsize=(8, 8)) plt.plot(plan['V']) plt.title('Value function') plt.show()