Example #1
0
__author__ = 'dot'

from GridWorld import GridWorld
from QAgent import QAgent
import numpy as np
import matplotlib.pyplot as plt

##
env = GridWorld(size=10)
q_agent = QAgent(env.get_number_of_states(), GridWorld.get_number_of_actions())
res = []
for idx_epoch in range(400):
    s, r, d, info = env.reset()
    print("Reset:st=%d,r=%f,d=%d,%s" % (s, r, d, str(info)))
    for t in range(100):
        # env.render()
        curAction = q_agent.get_action_epsilon_greedy(env.get_state())
        # print("State=%d,Action=%d"%(env.getState(),curAction))
        nxtSt, nxtR, done, info = env.step(curAction)
        # print("nxtSt=%d,nxtR=%f,d=%d,info=%s"%(nxtSt,nxtR,done,str(info)))
        q_agent.update(curAction, nxtSt, nxtR)
        if done:
            print("Episode %d finished after %d time steps" %
                  (idx_epoch, t + 1))
            #print(q_agent.show_q())
            print("=============")
            res.append(t + 1)
            break

print("p1")