Example #1
0
from time import sleep
from gridworld import GridWorld

gw = GridWorld(rows=3,
               cols=5,
               pits=[(2, 0), (2, 1), (2, 2), (2, 3), (2, 4)],
               goals=[(1, 4)],
               live_reward=-0.01)

# Fixed exploration: 50%
# gw.q_learning(gamma=0.9, epsilon_0=0.5)

# Decreasing exploration (up to 100 )
# gw.q_learning(gamma=0.9, episodes=100)

# Decreasing exploration with utilities plot
gw.q_learning(gamma=0.9, episodes=100, plot=True)