action = env.getActionSet()[action] reward = env.act(action) obs = list(env.getGameState().values()) episode_reward += reward if render: env.getScreenRGB() if env.game_over(): break eval_reward.append(episode_reward) return np.mean(eval_reward) env = Catcher(500, 500) env = PLE(env, fps=10, display_screen=True, force_fps=False) act_dim = len(env.getActionSet()) obs_dim = len(env.getGameState()) rpm = ReplayMemory(MEMORY_SIZE) model = Model(act_dim=act_dim) alg = DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE) agent = Agent(alg, obs_dim=obs_dim, act_dim=act_dim, e_greed_decrement=0.1, e_greed=1e-6) """ #添加经验池 while len(rpm) < MEMORY_WARMUP_SIZE: run_episode(env, agent, rpm)
'fruit_y': int 0 - 300 } Actions: [97, 100, None] ''' game = Catcher(width=256, height=256, init_lives=10) p = PLE(game, fps=30, display_screen=True, force_fps=False) p.init() agent = RandomAgent(p.getActionSet()) nb_frames = 50000 reward = 0.0 print(game.getGameState()) print(p.getActionSet()) for f in range(nb_frames): if p.game_over(): #check if the game is over p.reset_game() print("-----------------") state0 = game.getGameState() action_index = agent.pickAction(state0) action = actionValue(action_index) print(action) fruit_reward = p.act(action) state1 = game.getGameState() reward = Reward(state0, state1, fruit_reward)
''' State Formate: { 'player_x': int, 'player_vel': float, 'fruit_x': int, 'fruit_y': int } Actions: [97, 100, None] ''' game = Catcher(width=256, height=256, init_lives=3) p = PLE(game, fps=30, display_screen=True, force_fps=False) p.init() agent = RandomAgent(p.getActionSet()) nb_frames = 1000 reward = 0.0 print(game.getGameState()) print(p.getActionSet()) for f in range(nb_frames): if p.game_over(): #check if the game is over p.reset_game() state = game.getGameState() action = agent.pickAction(state, reward) reward = p.act(action)
# initialize game game = Catcher(width=100, height=100, init_lives=1) p = PLE(game, fps=30, frame_skip=3, num_steps=1, force_fps=False, display_screen=True) p.init() # initialize agent agent = PlayingAgent(p.getActionSet(), model) # run training episodes = 10 max_timestamps = 300 episode_results = [] for episode_index in range(episodes): p.reset_game() for timestamp in range(max_timestamps): observation = game.getGameState() action = agent.pick_action(observation) reward = p.act(action) if p.game_over(): break print('Episode %5d Timestamps: %d' % (episode_index + 1, timestamp + 1))