Ejemplo n.º 1
0
 def run(self):
     # initialize
     dqn_agent = DQN(0.99, 0, 1, 0.001, 50000, 32, 4)
     g = game.gameState()
     action_0 = np.array([1, 0, 0, 0])
     screen_0, reward_0, is_dead_0 = g.frameStep(action_0)
     screen_0 = cv2.cvtColor(cv2.resize(screen_0, (84, 84)),
                             cv2.COLOR_BGR2GRAY)
     _, screen_0 = cv2.threshold(screen_0, 1, 255, cv2.THRESH_BINARY)
     dqn_agent.init_state(screen_0)
     while True:
         # get action and update the game status
         action = dqn_agent.get_action()
         screen_1, r, done = g.frameStep(action)
         screen_1 = self.screen_handle(screen_1)
         _, q_val = dqn_agent.add_replay(screen_1, action, r, done)
         if done == True:
             score, episode = g.ret_score()
             print(score, episode)
 def run(self):
     # initialize
     # discount factor 0.99
     ag = DQN(0.99, 0, 1, 0.001, 50000, 32, 4)
     g = game.gameState()
     a_0 = np.array([1, 0, 0, 0])
     s_0, r_0, d = g.frameStep(a_0)
     s_0 = cv2.cvtColor(cv2.resize(s_0, (84, 84)), cv2.COLOR_BGR2GRAY)
     _, s_0 = cv2.threshold(s_0, 1, 255, cv2.THRESH_BINARY)
     ag.initState(s_0)
     while True:
         a = ag.getAction()
         s_t1, r, done = g.frameStep(a)
         s_t1 = self.screen_handle(s_t1)
         ts, qv = ag.addReplay(s_t1, a, r, done)
         if done == True:
             sc, ep = g.retScore()
             print("Epoch: {}  Q-Value: {:.3f}  Episode: {}  Score: {}".format(ts,qv,ep,sc))
         else:
             print("Epoch: {}  Q-Value: {:.3f}".format(ts,qv))
Ejemplo n.º 3
0
 def run(self):
     # initialize
     g = game.gameState()
     a_0 = np.array([1, 0, 0, 0])
     s_0, r_0, done = g.frameStep(a_0)
     s_0 = cv2.cvtColor(cv2.resize(s_0, (84, 84)),cv2.COLOR_BGR2GRAY)
     _, s_0 = cv2.threshold(s_0, 1, 255, cv2.THRESH_BINARY)
     ag = DQN()
     s=ag.initState(s_0)
     while True:
         a = ag.get_action()
         s1, r, done = g.frameStep(a)
         s1 = self.screen_handle(s1)
         print(s1.shape)
         ts = ag.store_transition(s1, a, r, done)
         qv=ag.learn()
         if done == True:
             sc, ep = g.retScore()
             print(ts,",",qv,",",ep, ",", sc)
         else:
             print(ts,",",qv,",,")