game = FlappyBird() p = PLE(game, fps=30, display_screen=True, force_fps=False, state_preprocessor=process_state) p.init() game.ple = p #number of the game we launch. This way we avoid overwriting files #that were created with other params number_experiment = randint(0, 10000000) #agent action_set = p.getActionSet() agent = QLearnerEvolverFlappy(len(action_set), p.getGameStateDims()[1]) agent.should_epsilon_decay = False #to control the decay differently # agent.load("flappy1_100.h5") nb_games = 1 #game counter nb_frames = 0 #frame counter score_game = 0 #score of the current game #to average last losses and scores last_losses = deque(maxlen=1000) last_500_games_score = deque(maxlen=500) #flags to write in files only once per x game flag_game_10 = False flag_game_100 = False flag_game_500 = False
return np.array([list(state.values())]) game = FlappyBird() p = PLE(game, fps=30, display_screen=True, force_fps=False, state_preprocessor=process_state) p.init() game.ple = p p.init() #agent action_set = p.getActionSet() agent = PolicyNetwork(len(action_set), p.getGameStateDims()[1]) #some flags and variables nb_games = 1 nb_frames = 0 last_losses = deque(maxlen=1000) flag_game_10 = False flag_game_100 = False flag_game_500 = False score_game = 0 last_500_games_score = deque(maxlen=500) #variables linked to epsilon decrease EXPLORE = 300000 #small is 300000, big is 5000000 FINAL_EPSILON = 0.0001 INITIAL_EPSILON = 0.8
STEPS_PER_EPOCHS = 1000 EPOCHS = 60 EPSILON_START = 0.01 EPSILON_DECAY = EPOCHS * STEPS_PER_EPOCHS EPSILON_MIN = 0.00000 EPSILON_DECAY_V = (EPSILON_MIN - EPSILON_START) / EPSILON_DECAY game = flappy.FlappyClone() env = PLE(game, display_screen=True, force_fps=True, fps=30, state_preprocessor=preprocessor) env.init() approxQAgent = ApproxQAgent(env.getActionSet(), env.getGameStateDims(), features, learningRate=.002) reward = 0. epsilon = EPSILON_START for e in range(EPOCHS): avgloss = 0. avgreward = 0. for s in range(STEPS_PER_EPOCHS): if env.game_over(): # if the game is over, reset # print("tick {} death at score: {}".format(e * STEPS_PER_EPOCHS + s, game.getScore())) env.reset_game() obs = env.getGameState() action = approxQAgent.getAction(obs, epsilon)
game = FlappyBird() p = PLE(game, fps=30, display_screen=True, force_fps=False, state_preprocessor=process_state) p.init() game.ple = p p.init() #print(p.getActionSet()) #agent action_set = p.getActionSet() agent = RandomSearch(len(action_set), p.getGameStateDims()[1]) # agent.load("flappy1_100.h5") nb_games = 1 nb_frames = 0 flag_game_10 = False flag_game_100 = False flag_game_50 = False score_game = 0 last_50_games_score = deque(maxlen=50) EXPLORE = 5000000 #small is 300000, big is 5000000 FINAL_EPSILON = 0.0001 INITIAL_EPSILON = 0.1
class MyEnv(Environment): VALIDATION_MODE = 0 # original size is 288x512 so dividing def __init__(self, rng, game=None, frame_skip=4, ple_options={"display_screen": True, "force_fps":True, "fps":30}): self._mode = -1 self._mode_score = 0.0 self._mode_episode_count = 0 self._frame_skip = frame_skip if frame_skip >= 1 else 1 self._random_state = rng self._hist_size = 1 if game is None: raise ValueError("Game must be provided") self._ple = PLE(game, **ple_options) self._ple.init() self._actions = self._ple.getActionSet() self._state_size = self._ple.getGameStateDims()[0] self._state_saved = np.zeros((self._state_size), dtype=np.float32) self.previous_score = 0. self.episode_scores = [] def reset(self, mode): if mode == MyEnv.VALIDATION_MODE: if self._mode != MyEnv.VALIDATION_MODE: self._mode = MyEnv.VALIDATION_MODE self._mode_score = 0.0 self.episode_scores = [] self.previous_score = .0 self._mode_episode_count = 0 else: self._mode_episode_count += 1 self.episode_scores.append(self._mode_score - self.previous_score) self.previous_score = self._mode_score elif self._mode != -1: # and thus mode == -1 self._mode = -1 # print("Dead at score {}".format(self._ple.game.getScore())) self._ple.reset_game() for _ in range(self._random_state.randint(self._hist_size)): self._ple.act(self._ple.NOOP) return [[[0] * self._state_size] * self._hist_size] def act(self, action): action = self._actions[action] reward = 0 for _ in range(self._frame_skip): reward += self._ple.act(action) if self.inTerminalState(): break self._state_saved = self._ple.getGameState() self._mode_score += reward if self.inTerminalState(): pass return reward #np.sign(reward) def summarizePerformance(self, test_data_set): if self.inTerminalState() == False: self._mode_episode_count += 1 maxscore = max(self.episode_scores) if len(self.episode_scores) else "N/A" print("== Max score of episode is {} over {} episodes ==".format( maxscore, self._mode_episode_count)) def inputDimensions(self): return [(self._hist_size, self._state_size)] def observationType(self, subject): return np.float32 def nActions(self): return len(self._actions) def observe(self): return [np.array(self._state_saved)] def inTerminalState(self): return self._ple.game_over()
class PygameLearningEnvironment(Environment): def __init__(self, game_name, rewards, state_as_image = True, fps = 30, force_fps=True, frame_skip=2, hold_action=2, visualize=False, width=84, height=84, lives=1): """ Initialize Pygame Learning Environment https://github.com/ntasfi/PyGame-Learning-Environment Args: env_name: PLE environment fps: frames per second force_fps: False for slower speeds frame_skip: number of env frames to skip hold_action: number of env frames to hold each action for isRGB: get color or greyscale version of statespace #isRGB = False, game_height,game_width: height and width of environment visualize: If set True, the program will visualize the trainings, will slow down training lives: number of lives in game. Game resets on game over (ie lives = 0). only in Catcher and Pong (score) """ self.env_name = game_name self.rewards = rewards self.lives = lives self.state_as_image = state_as_image self.fps = fps #30 # frames per second self.force_fps = force_fps #True # False for slower speeds self.frame_skip = frame_skip # frames to skip self.ple_num_steps = hold_action # frames to continue action for #self.isRGB = isRGB #always returns color, lets tensorforce due the processing self.visualize = visualize self.width = width self.height = height #testing self.reached_terminal = 0 self.episode_time_steps = 0 self.episode_reward = 0 self.total_time_steps = 0 if self.env_name == 'catcher': self.game = Catcher(width=self.width, height=self.height,init_lives=self.lives) elif self.env_name == 'pixelcopter': self.game = Pixelcopter(width=self.width, height=self.height) elif self.env_name == 'pong': self.game = Pong(width=self.width, height=self.height,MAX_SCORE=self.lives) elif self.env_name == 'puckworld': self.game = PuckWorld(width=self.width, height=self.height) elif self.env_name == 'raycastmaze': self.game = RaycastMaze(width=self.width, height=self.height) elif self.env_name == 'snake': self.game = Snake(width=self.width, height=self.height) elif self.env_name == 'waterworld': self.game = WaterWorld(width=self.width, height=self.height) elif self.env_name == 'monsterkong': self.game = MonsterKong() elif self.env_name == 'flappybird': self.game = FlappyBird(width=144, height=256) # limitations on height and width for flappy bird else: raise TensorForceError('Unknown Game Environement.') if self.state_as_image: process_state = None else: #create a preprocessor to read the state dictionary as a numpy array def process_state(state): # ret_value = np.fromiter(state.values(),dtype=float,count=len(state)) ret_value = np.array(list(state.values()), dtype=np.float32) return ret_value # make a PLE instance self.env = PLE(self.game,reward_values=self.rewards,fps=self.fps, frame_skip=self.frame_skip, num_steps=self.ple_num_steps,force_fps=self.force_fps,display_screen=self.visualize, state_preprocessor = process_state) #self.env.init() #self.env.act(self.env.NOOP) #game starts on black screen #self.env.reset_game() #self.env.act(self.env.NOOP) #self.env.act(self.env.NOOP) #self.env.act(self.env.NOOP) #self.env.act(self.env.NOOP) #self.env.reset_game() # setup gamescreen object if state_as_image: w, h = self.env.getScreenDims() self.gamescreen = np.empty((h, w, 3), dtype=np.uint8) else: self.gamescreen = np.empty(self.env.getGameStateDims(), dtype=np.float32) # if isRGB: # self.gamescreen = np.empty((h, w, 3), dtype=np.uint8) # else: # self.gamescreen = np.empty((h, w), dtype=np.uint8) # setup action converter # PLE returns legal action indexes, convert these to just numbers self.action_list = self.env.getActionSet() self.action_list = sorted(self.action_list, key=lambda x: (x is None, x)) def __str__(self): return 'PygameLearningEnvironment({})'.format(self.env_name) def close(self): pygame.quit() self.env = None def reset(self): # if isinstance(self.gym, gym.wrappers.Monitor): # self.gym.stats_recorder.done = True #env.act(env.NOOP) # need to take an action or screen is black # clear gamescreen if self.state_as_image: self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.uint8) else: self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.float32) self.env.reset_game() return self.current_state def execute(self, actions): #print("lives check in ple {}".format(self.env.lives())) #self.env.saveScreen("test_screen_capture_before_{}.png".format(self.total_time_steps)) #lives_check = self.env.lives() #testing code ple_actions = self.action_list[actions] reward = self.env.act(ple_actions) state = self.current_state # testing code # self.env.saveScreen("test_screen_capture_after_{}.png".format(self.total_time_steps)) # self.episode_time_steps += 1 # self.episode_reward += reward # self.total_time_steps += 1 # print("reward is {}".format(reward)) # #if self.env.lives() != lives_check: # # print('lives are different is game over? {}'.format(self.env.game_over())) # print('lives {}, game over {}, old lives {}'.format(self.env.lives(),self.env.game_over(),lives_check)) if self.env.game_over(): terminal = True # testing code self.reached_terminal += 1 # print("GAME OVER reached terminal {}".format(self.reached_terminal)) # print("episode time steps {}, episode reward {}".format(self.episode_time_steps,self.episode_reward)) # self.episode_reward = 0 # self.episode_time_steps = 0 # print("total timesteps {}".format(self.total_time_steps)) else: terminal = False return state, terminal, reward @property def actions(self): return dict(type='int', num_actions=len(self.action_list), names=self.action_list) # @property # def actions(self): # return OpenAIGym.action_from_space(space=self.gym.action_space) #ALE implementation # @property # def actions(self): # return dict(type='int', num_actions=len(self.action_inds), names=self.action_names) @property def states(self): return dict(shape=self.gamescreen.shape, type=float) @property def current_state(self): #returned state can either be an image or an np array of key components if self.state_as_image: self.gamescreen = self.env.getScreenRGB() # if isRGB: # self.gamescreen = self.env.getScreenRGB() # else: # self.gamescreen = self.env.getScreenGrayscale() else: self.gamescreen = self.env.getGameState() return np.copy(self.gamescreen) #ALE implementation # @property # def states(self): # return dict(shape=self.gamescreen.shape, type=float) # @property # def current_state(self): # self.gamescreen = self.ale.getScreenRGB(self.gamescreen) # return np.copy(self.gamescreen) # @property # def is_terminal(self): # if self.loss_of_life_termination and self.life_lost: # return True # else: # return self.ale.game_over()
class PLEEnv(gym.Env): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, prespecified_game=True, game_name='MyCatcher', display_screen=True, rgb_state=False): # open up a game state to communicate with emulator import importlib if prespecified_game: game_module_name = ('ple.games.%s' % game_name).lower() else: game_module_name = ('domains.ple.%s' % game_name).lower() game_module = importlib.import_module(game_module_name) self.game = getattr(game_module, game_name)() self.rgb_state = rgb_state if self.rgb_state: self.game_state = PLE(self.game, fps=30, display_screen=display_screen) else: if prespecified_game: self.game_state = PLE( self.game, fps=30, display_screen=display_screen, state_preprocessor=process_state_prespecified) else: self.game_state = PLE(self.game, fps=30, display_screen=display_screen, state_preprocessor=process_state) self.game_state.init() self._action_set = self.game_state.getActionSet() self.action_space = spaces.Discrete(len(self._action_set)) if self.rgb_state: self.state_width, self.state_height = self.game_state.getScreenDims( ) self.observation_space = spaces.Box(low=0, high=255, shape=(self.state_width, self.state_height, 3)) else: self.state_dim = self.game_state.getGameStateDims() self.observation_space = spaces.Box(low=0, high=255, shape=self.state_dim) self.viewer = None self.feature_bins = [] if hasattr(self.game, 'feature_bins'): self.feature_bins = self.game.feature_bins def get_source_state(self, state): if hasattr(self.game, 'get_source_state'): return self.game.get_source_state(state) return None def get_uniform_state_weights(self): if hasattr(self.game, 'get_uniform_state_weights'): return self.game.get_uniform_state_weights() else: states = self.get_states() weights = np.ones(len(states)) weights = [float(i) / sum(weights) for i in weights] return states, weights def generate_training_subset(self, percent_sim_data): if hasattr(self.game, 'generate_training_subset'): return self.game.generate_training_subset(percent_sim_data) def set_to_training_set(self): if hasattr(self.game, 'set_to_training_set'): return self.game.set_to_training_set() def set_to_testing_set(self): if hasattr(self.game, 'set_to_testing_set'): return self.game.set_to_testing_set() def get_states(self): if hasattr(self.game, 'states'): return self.game.states def _step(self, a): reward = self.game_state.act(self._action_set[a]) state = self._get_state() terminal = self.game_state.game_over() return state, reward, terminal, {} def _get_image(self, game_state): image_rotated = np.fliplr( np.rot90(game_state.getScreenRGB(), 3)) # Hack to fix the rotated image returned by ple return image_rotated def _get_state(self): if self.rgb_state: return self._get_image(self.game_state) else: return self.game_state.getGameState() @property def _n_actions(self): return len(self._action_set) # return: (states, observations) def _reset(self): if self.rgb_state: self.observation_space = spaces.Box(low=0, high=255, shape=(self.state_width, self.state_height, 3)) else: self.observation_space = spaces.Box(low=0, high=255, shape=self.state_dim) self.game_state.reset_game() state = self._get_state() return state def _render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return img = self._get_image(self.game_state) if mode == 'rgb_array': return img elif mode == 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def _seed(self, seed): rng = np.random.RandomState(seed) self.game_state.rng = rng self.game_state.game.rng = self.game_state.rng self.game_state.init()
# shooting agent agent = ShootAgent(p.getActionSet()) # init agent and game. p.init() # lets do a random number of NOOP's for i in range(np.random.randint(0, max_noops)): reward = p.act(p.NOOP) # start our training loop for f in range(nb_frames): # if the game is over if p.game_over(): p.reset_game() print('game over') (screen_width, screen_height) = p.getScreenDims() print(screen_width, screen_height) print(p.getGameStateDims()) obs = p.getScreenRGB() from PIL import Image img = Image.fromarray(obs) img.show() # state = p.getGameState() break # print(state) # action = agent.pickAction(reward, obs) # reward = p.act(action) # print('score: {}'.format(reward))
]).reshape(1, -1) game = Joust(display_screen=True) p = PLE(game, fps=30, display_screen=False, state_preprocessor=process_state, force_fps=False) p.init() player1 = game.player1 player2 = game.player2 agent1 = DQNAgent(player1, game.p1_actions, p.getGameStateDims(), log_level=logging.INFO) agent2 = DQNAgent(player2, game.p2_actions, p.getGameStateDims(), log_level=logging.INFO) game.adjustRewards({ "positive": 0.1, "tick": 0.001, "negative": -0.1, "win": 1, "loss": -1 }) nb_frames = 500
targetH = selectedPipeBotY - DELTA_H if (state[player_y] > targetH): action = flap return self.possibleActions[action] def preprocessor(state): return np.array([state[k] for k in sorted(state.keys())]) game = flappy.FlappyClone(crazy=False) env = PLE(game, display_screen=True, force_fps=True, fps=30, state_preprocessor=preprocessor) env.init() expertAgent = ExpertAgent(env.getActionSet(), env.getGameStateDims()) for e in range(1, 101): while True: if env.game_over(): # if the game is over, reset print("test {}, death at score: {}".format(e, game.getScore())) env.game.tick(1. / 2.) env.reset_game() break reward = env.act(expertAgent.getAction(env.getGameState())) print("score={:010.1f}".format(game.getScore()), end="\r") env.game.tick(FPS)