def run(self): # initialize dqn_agent = DQN(0.99, 0, 1, 0.001, 50000, 32, 4) g = game.gameState() action_0 = np.array([1, 0, 0, 0]) screen_0, reward_0, is_dead_0 = g.frameStep(action_0) screen_0 = cv2.cvtColor(cv2.resize(screen_0, (84, 84)), cv2.COLOR_BGR2GRAY) _, screen_0 = cv2.threshold(screen_0, 1, 255, cv2.THRESH_BINARY) dqn_agent.init_state(screen_0) while True: # get action and update the game status action = dqn_agent.get_action() screen_1, r, done = g.frameStep(action) screen_1 = self.screen_handle(screen_1) _, q_val = dqn_agent.add_replay(screen_1, action, r, done) if done == True: score, episode = g.ret_score() print(score, episode)
def run(self): # initialize # discount factor 0.99 ag = DQN(0.99, 0, 1, 0.001, 50000, 32, 4) g = game.gameState() a_0 = np.array([1, 0, 0, 0]) s_0, r_0, d = g.frameStep(a_0) s_0 = cv2.cvtColor(cv2.resize(s_0, (84, 84)), cv2.COLOR_BGR2GRAY) _, s_0 = cv2.threshold(s_0, 1, 255, cv2.THRESH_BINARY) ag.initState(s_0) while True: a = ag.getAction() s_t1, r, done = g.frameStep(a) s_t1 = self.screen_handle(s_t1) ts, qv = ag.addReplay(s_t1, a, r, done) if done == True: sc, ep = g.retScore() print("Epoch: {} Q-Value: {:.3f} Episode: {} Score: {}".format(ts,qv,ep,sc)) else: print("Epoch: {} Q-Value: {:.3f}".format(ts,qv))
def run(self): # initialize g = game.gameState() a_0 = np.array([1, 0, 0, 0]) s_0, r_0, done = g.frameStep(a_0) s_0 = cv2.cvtColor(cv2.resize(s_0, (84, 84)),cv2.COLOR_BGR2GRAY) _, s_0 = cv2.threshold(s_0, 1, 255, cv2.THRESH_BINARY) ag = DQN() s=ag.initState(s_0) while True: a = ag.get_action() s1, r, done = g.frameStep(a) s1 = self.screen_handle(s1) print(s1.shape) ts = ag.store_transition(s1, a, r, done) qv=ag.learn() if done == True: sc, ep = g.retScore() print(ts,",",qv,",",ep, ",", sc) else: print(ts,",",qv,",,")