def __init__(self, **kwargs): self.game = FlappyBird() self.p = PLE(self.game, **kwargs) self.action_set = self.p.getActionSet() # 3个输入状态:见函数self._get_obs self.observation_space = spaces.Discrete(3) # 两个输出状态:跳或者不跳 self.action_space = spaces.Discrete(2)
def __init__(self, reward_values=None, reward_discount=0.99, pip_gap=100, display_screen=True, fps=30, force_fps=True): if reward_values is None: reward_values = {} self.game = PLE(FlappyBird(pipe_gap=pip_gap), reward_values=reward_values, fps=fps, force_fps=force_fps, display_screen=display_screen) self.game.init() self.actions = self.game.getActionSet() self.reward_discount = reward_discount
class FlappyBirdWrapper(Env): # 如果想把画面渲染出来,就传参display_screen=True def __init__(self, **kwargs): self.game = FlappyBird() self.p = PLE(self.game, **kwargs) self.action_set = self.p.getActionSet() # 3个输入状态:见函数self._get_obs self.observation_space = spaces.Discrete(3) # 两个输出状态:跳或者不跳 self.action_space = spaces.Discrete(2) def _get_obs(self): # 获取游戏的状态 state = self.game.getGameState() # 小鸟与它前面一对水管中下面那根水管的水平距离 dist_to_pipe_horz = state["next_pipe_dist_to_player"] # 小鸟与它前面一对水管中下面那根水管的顶端的垂直距离 dist_to_pipe_bottom = state["player_y"] - state["next_pipe_top_y"] # 获取小鸟的水平速度 velocity = state['player_vel'] # 将这些信息封装成一个数据返回 return np.array([dist_to_pipe_horz, dist_to_pipe_bottom, velocity]) def reset(self): self.p.reset_game() return self._get_obs() def step(self, action): reward = self.p.act(self.action_set[action]) obs = self._get_obs() done = self.p.game_over() return obs, reward, done, dict() def seed(self, *args, **kwargs): pass def render(self, *args, **kwargs): pass
target_f = self.model.predict(state) target_f[0][action] = target self.model.fit(state, target_f, epochs=1, verbose=0) if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay def load(self, name): self.model.load_weights(name) def save(self, name): self.model.save_weights(name) if __name__ == "__main__": game = FlappyBird() game.allowed_fps = 30 p = PLE(game, fps=30, display_screen=True) agent = DQNAgent(8, 2) if os.path.exists('memory.h5py'): agent.load('memory.h5py') atexit.register(agent.save, 'memory.h5py') p.init() reward = 0.0 max_score = 0 current_score = 0 while True: state = parse_game_state(game.getGameState()) for frame in range(1000): action = np.argmax(agent.model.predict(state)[0])
pipe_1_top_y = list[3] pipe_1_bottom_y = list[4] list[3] = pipe_1_top_y - y_position list[4] = pipe_1_bottom_y - y_position pipe_2_top_y = list[6] pipe_2_bottom_y = list[7] list[6] = pipe_2_top_y - y_position list[7] = pipe_2_bottom_y - y_position if (len(list) == 8): out = np.array([list]) return out game = FlappyBird() p = PLE(game, fps=30, display_screen=False, force_fps=False) p.init() scores = [] length = [] agent = DQL.deep_learner(8, 2, p.getActionSet()) nb_games = 0 nb_max = 1000 for layer in agent.model.layers: print(layer.get_config()) step = 0 reward = 0
self.is_test = True self.p.reset_game() total_reward = 0 while not self.p.game_over(): action = self.select_action(self.state()) total_reward += self.step(action) logger.info("Total-Reward: %s" % total_reward) def seed_torch(seed_): torch.manual_seed(seed_) if torch.backends.cudnn.enabled: torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True if __name__ == '__main__': hyper = HYPER_PARAMS.copy() hyper['obs_dim'] = 8 hyper['action_dim'] = 2 hyper['epoch_log'] = 1000 hyper['epochs'] = 1000000 game = FlappyBird() agent = Agent(hyper, game) agent.train() agent.test()
def act(self, p, action): """ 执行动作 :param p: 通过p来向游戏发出动作命令 :param action: 动作 :return: 奖励 """ r = p.act(self.action_set[action]) return r if __name__ == "__main__": # 训练次数 episodes = 20000 # 实例化游戏对象 game = FlappyBird() # 类似游戏的一个接口,可以为我们提供一些功能 p = PLE(game, fps=30, display_screen=True) # 初始化 p.init() # 实例化Agent,将动作集传进去 agent = Agent(p.getActionSet()) for episode in range(episodes): # 重置游戏 p.reset_game() # 获得状态 state = agent.get_state(game.getGameState()) while True: # 获得最佳动作
import matplotlib.pyplot as plt import numpy as np import rl.deep_Q_learning as DQL import h5py def process_obs(obs): list = [] for item in obs: list.append(obs[item]) if (len(list) == 8): out = np.array(list) return out game = FlappyBird() p = PLE(game, fps=30, display_screen=True, force_fps=False) p.init() scores = [] agent = DQL.deep_learner(1, 2, p.getActionSet()) agent.learning_rate = 0.001 agent.epsilon = 0.001 agent.epsilon_decay = 1.0 agent.epsilon_min = 0.00001 #agent.load_model_json() agent.model.load_weights('my_model.h5') nb_games = 0