def eval_q_net(): """ 连跑训练地图各5000次 日志输出单步成功率,回合成功率 :return: """ brain = BrainDQN() brain.epsilon = 0 for index in range(len(_MAP_LIST)): maze = env.Maze(_MAP_LIST, is_show=True, is_loop=False, map_index=index) maze.effective_epsilon = 1 for i in range(1001): observation, key_observation = maze.reset() while True: maze.render(False) action, is_random = brain.getAction(observation, key_observation) next_observation, next_key_observation, r, done = maze.step( action, is_random) observation = next_observation.copy() key_observation = next_key_observation.copy() if done: break
def main(is_debug): brain = BrainDQN() if is_debug: brain.epsilon = 0 maze = env.Maze(_MAP_LIST, is_show=True) while True: observation, key_observation = maze.reset() while True: maze.render(is_debug) action, is_random = brain.getAction(observation, key_observation) next_observation, next_key_observation, r, done = maze.step( action, is_random) brain.setPerception(observation, key_observation, next_observation, next_key_observation, action, r, done) observation = next_observation.copy() key_observation = next_key_observation.copy() if done: break
def eval_q_net(): maze = env.Maze(_MAP1, is_show=True) memory = ReplayMemory() memory.load("eval_data") brain = BrainDQN() # 评估时不随机 brain.epsilon = 0 r_list = [] for data in memory.memory: s, action, r, s_, done = data[0], data[1], data[2], data[3], data[4] action, _ = brain.getAction(s) maze.set_observation(s) _, r_eval, done_eval = maze.step(action, False) r_list.append(r_eval - r) logging.info(sum(r_list) / len(r_list)) plt.plot(np.arange(len(r_list)), r_list) plt.ylabel('reward loss') plt.xlabel('sample') plt.show()
def main(): brain = BrainDQN() maze = env.Maze(_MAP1, is_show=True) while True: observation = maze.reset() while True: maze.render() action, is_random = brain.getAction(observation) next_observation, r, done = maze.step(action, is_random) brain.setPerception(observation, next_observation, action, r, done) observation = next_observation.copy() if done: break
def gen_eval_data_manually(): maze = env.Maze(_MAP1, is_show=True) key_map = { "1": env.LEFT_DOWN, "2": env.DOWN, "3": env.RIGHT_DOWN, "4": env.LEFT, "5": env.STOP, "6": env.RIGHT, "7": env.LEFT_UP, "8": env.UP, "9": env.RIGHT_UP } memory = ReplayMemory() memory.load("eval_data") done = True while True: if done: observation = maze.reset() s = input('input your path. Enter "exit" to close\n') if s == "exit": break next_observation, r, done = maze.step(key_map[s], True) memory.push_back( (observation.copy(), key_map[s], r, next_observation.copy(), done)) observation = next_observation.copy() maze.render(is_sleep=True) memory.dump("eval_data")