Esempio n. 1
0
def main():
    render_bool = True
    if not render_bool:
        os.environ["SDL_VIDEODRIVER"] = "dummy"
    # else:
    #     pygame.display.set_mode((800, 600 + 60))
    # 创建环境
    game = GameEnv()
    p = PLE(game, display_screen=render_bool, fps=60, force_fps=False
            )  # , fps=30, display_screen=render_bool, force_fps=True)

    p.init()

    # 根据parl框架构建agent
    print(p.getActionSet())
    act_dim = len(p.getActionSet())
    width, height = p.getScreenDims()
    rpm = ReplayMemory(MEMORY_SIZE)  # DQN的经验回放池
    obs_dim = 1, width, height
    model = Model(act_dim=act_dim)
    alg = DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE)
    agent = Agent(alg,
                  obs_dim=obs_dim,
                  act_dim=act_dim,
                  e_greed=0.5,
                  e_greed_decrement=0.00001)  # e_greed有一定概率随机选取动作,探索

    # 加载模型
    best_eval_reward = -1000

    if os.path.exists('./model_dqn.ckpt'):
        print("loaded model:", './model_dqn.ckpt')
        agent.restore('./model_dqn.ckpt')
        best_eval_reward = evaluate(p, agent, render=render_bool)
        # run_episode(env, agent, train_or_test='test', render=True)
        # exit()
    # 先往经验池里存一些数据,避免最开始训练的时候样本丰富度不够
    while len(rpm) < MEMORY_WARMUP_SIZE:
        run_episode(p, agent, rpm)

    max_episode = 200000
    # 开始训练
    episode = 0

    while episode < max_episode:  # 训练max_episode个回合,test部分不计算入episode数量
        # train part
        for i in range(0, 5):
            total_reward = run_episode(p, agent, rpm)
            episode += 1
        # test part
        eval_reward = evaluate(p, agent,
                               render=render_bool)  # render=True 查看显示效果
        logger.info('episode:{}    e_greed:{}   test_reward:{}'.format(
            episode, agent.e_greed, eval_reward))

        # 保存模型到文件 ./model.ckpt
        agent.save('./model_dqn_%d.ckpt' % rate_num)
        if best_eval_reward < eval_reward:
            best_eval_reward = eval_reward
            agent.save('./model_dqn.ckpt')
Esempio n. 2
0
class MyEnv(Environment):
    VALIDATION_MODE = 0
    memSize = 4
    # original size is 288x512 so dividing
    dividing_factor = 8
    width = 288 // dividing_factor
    height = 512 // dividing_factor

    def __init__(self,
                 rng,
                 game=None,
                 frame_skip=4,
                 ple_options={
                     "display_screen": True,
                     "force_fps": True,
                     "fps": 30
                 }):

        self._mode = -1
        self._mode_score = 0.0
        self._mode_episode_count = 0

        self._frame_skip = frame_skip if frame_skip >= 1 else 1
        self._random_state = rng

        if game is None:
            raise ValueError("Game must be provided")

        self._ple = PLE(game, **ple_options)
        self._ple.init()

        w, h = self._ple.getScreenDims()
        self._screen = np.empty((w, h), dtype=np.uint8)
        self._reduced_screen = np.empty((self.width, self.height),
                                        dtype=np.uint8)
        self._actions = self._ple.getActionSet()

    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._mode_score = 0.0
                self._mode_episode_count = 0
            else:
                self._mode_episode_count += 1
        elif self._mode != -1:  # and thus mode == -1
            self._mode = -1

        print("Dead at score {}".format(self._ple.game.getScore()))
        self._ple.reset_game()
        # for _ in range(self._random_state.randint(15)):
        # self._ple.act(self._ple.NOOP)
        # self._screen = self._ple.getScreenGrayscale()
        # cv2.resize(self._screen, (48, 48),
        # self._reduced_screen,
        # interpolation=cv2.INTER_NEAREST)

        return [self.memSize * [self.width * [self.height * [0]]]]

    def act(self, action):
        action = self._actions[action]

        reward = 0
        for _ in range(self._frame_skip):
            reward += self._ple.act(action)
            if self.inTerminalState():
                break

        self._screen = self._ple.getScreenGrayscale()
        self._reduced_screen = cv2.resize(self._screen,
                                          (self.height, self.width),
                                          interpolation=cv2.INTER_NEAREST)
        cv2.imshow("debug", self._reduced_screen.T)
        cv2.waitKey(1)
        self._mode_score += reward
        return np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._mode_episode_count += 1
        mean = (self._mode_score / self._mode_episode_count
                if self._mode_episode_count else "N/A")
        print("== Mean score per episode is {} over {} episodes ==".format(
            mean, self._mode_episode_count))

    def inputDimensions(self):
        return [(self.memSize, self.width, self.height)]

    def observationType(self, subject):
        return np.float32

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._reduced_screen) / 256.]

    def inTerminalState(self):
        return self._ple.game_over()
Esempio n. 3
0
class PLEWaterWorldEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self,
                 game_name='WaterWorld',
                 display_screen=True,
                 ple_game=True,
                 obs_type="Image",
                 reward_type=1):
        '''
        For WaterWorld:
            getGameState() returns [player x position, player y position, player x velocity, player y velocity, player distance to each creep]
            player distance to each creep is a dict with "GOOD" : [], "BAD" : []
        @Params:
            obs_type :
                "RAM" : getGameState()
                "Image" : (48, 48, 3)
            reward_type :
                0 : means [reward1, reward2]
                1 : means raw reward
                2 : means change of dis = sum(distance_from_good) - sum(distance_from_bad)
        '''
        # set headless mode
        os.environ['SDL_VIDEODRIVER'] = 'dummy'

        # open up a game state to communicate with emulator
        import importlib
        if ple_game:
            game_module_name = ('ple.games.%s' % game_name).lower()
        else:
            game_module_name = game_name.lower()
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)()

        ##################################################################
        # old one
        #self.game_state = PLE(game, fps=30, display_screen=display_screen)

        # use arg state_preprocessor to support self.game_state.getGameState()
        self.game_state = PLE(game,
                              fps=30,
                              display_screen=display_screen,
                              state_preprocessor=self.process_state)
        ##################################################################

        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_height, self.screen_width = self.game_state.getScreenDims()
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3),
                                            dtype=np.uint8)
        self.viewer = None

        ############################################
        self.obs_type = obs_type
        self.reward_type = reward_type

        # every reward type's max-abs value
        self.rewards_ths = [10.0, 5.0]

        # change observation space:
        self.img_width = 84
        self.img_height = 84
        self.img_shape = (self.img_width, self.img_height, 3)
        if self.obs_type == "Image":
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=self.img_shape,
                                                dtype=np.uint8)
        else:
            print("Water world only supports image observation!")
            sys.exit(0)
        ############################################

    #############################################
    # Add state processer
    def process_state(self, state):
        return np.array([state.values()])

    #############################################

    def _step(self, a, gamma=0.99):
        #############################################
        # old observation
        old_ram = self.game_state.getGameState()
        #############################################

        reward = self.game_state.act(self._action_set[a])
        state = self._get_image()
        terminal = self.game_state.game_over()

        #############################################
        # new observation
        ram = self.game_state.getGameState()
        #############################################

        #############################################
        # reward 2
        if self.reward_type == 2:
            reward = self.get_reward(old_ram, ram, terminal, 2, gamma)

        # reward 0
        if self.reward_type == 0:
            reward1 = reward
            reward2 = self.get_reward(old_ram, ram, terminal, 2, gamma)
            reward = np.array([reward1, reward2])
        ##############################################

        ############################################################
        # reward scaling
        if self.reward_type == 0:
            for rt in range(len(reward)):
                reward[rt] = reward[rt] / self.rewards_ths[rt]
        else:
            reward = reward / self.rewards_ths[self.reward_type - 1]
        ############################################################

        return state, reward, terminal, {}

    #############################################
    # Add for reward
    #############################################
    def get_reward(self, old_ram, ram, done, reward_type, gamma=0.99):
        ''' 
        @Params:
            old_ram, ram : numpy.array, [dict_values([x, y, z, w, {"GOOD" : [], "BAD" : []}])]
            reward_type : 2 , change of distance from good - bad
        '''
        old_ram = list(old_ram[0])
        ram = list(ram[0])

        reward = 0.0
        if not done:
            if reward_type == 2:
                old_goods = np.array(old_ram[4]["GOOD"])
                old_bads = np.array(old_ram[4]["BAD"])
                goods = np.array(ram[4]["GOOD"])
                bads = np.array(ram[4]["BAD"])

                mean_old_goods = np.mean(
                    old_goods) if len(old_goods) > 0 else 0.0
                mean_old_bads = np.mean(old_bads) if len(old_bads) > 0 else 0.0
                mean_goods = np.mean(goods) if len(goods) > 0 else 0.0
                mean_bads = np.mean(bads) if len(bads) > 0 else 0.0

                old_sum_dis = mean_old_goods - mean_old_bads
                sum_dis = mean_goods - mean_bads
                reward = old_sum_dis - gamma * sum_dis

                if reward > 5.0:
                    reward = 5.0
                elif reward < -5.0:
                    reward = -5.0
        return reward

    #############################################
    #############################################

    def _get_image(self):
        image_rotated = np.fliplr(
            np.rot90(self.game_state.getScreenRGB(),
                     3))  # Hack to fix the rotated image returned by ple
        ##########################################
        # resize image
        img = Image.fromarray(image_rotated)
        img = img.resize((self.img_width, self.img_height), Image.ANTIALIAS)
        image_resized = np.array(img).astype(np.uint8)
        ##########################################
        return image_resized

    @property
    def _n_actions(self):
        return len(self._action_set)

    # return: (states, observations)
    def _reset(self):
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3),
                                            dtype=np.uint8)
        self.game_state.reset_game()
        state = self._get_image()
        return state

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        img = self._get_image()
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def _seed(self, seed):
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng

        self.game_state.init()
Esempio n. 4
0
class CustomGameMetaEnv(gym.Env):
    """
    Basically runs the same configuration for 2 episodes and then terminates
    """
    def __init__(self, task={}):
        self._task = task
        os.environ['SDL_VIDEODRIVER'] = 'dummy'

        import importlib
        game_module = importlib.import_module('ple.games.customgame')
        game = getattr(game_module, 'customgame')()

        self.game_state = PLE(game, fps=30, display_screen=False)
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_width, self.screen_height = self.game_state.getScreenDims()
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3))

        self.num_actions = len(self._action_set)
        self.viewer = None

        # env tracking variables
        self.done_counter = 0
        self.curr_task = None
        self.t = 0
        self.reward_mult = 1.0

    def seed(self, seed=None):
        if not seed:
            seed = np.random.randint(2**31-1)
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng
        self.game_state.init()
        return [seed]

    def reset_task(self, task):
        pass

    def render(self, mode='human'):
        img = self._get_image()
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def reset(self):
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3))
        self.game_state.reset_game()
        state = self._get_image()
        self.curr_task = self.game_state.game.get_task()
        self.t = 0
        self.game_state.game.set_task(self.curr_task)
        return state
        
    def _get_image(self):
        image_rotated = np.fliplr(np.rot90(self.game_state.getScreenRGB(),3)) # Hack to fix the rotated image returned by ple
        return image_rotated

    def step(self, action):
        reward = self.game_state.act(self._action_set[action]) * self.reward_mult
        state = self._get_image()
        terminal = self.game_state.game_over()
        self.t += 1
        prev_done_counter = int(self.done_counter)
        if terminal or self.t == 400:
            self.done_counter += 1
            if self.done_counter == 2:
                self.done_counter = 0
                self.game_state.game.set_task(None)
                terminal = True
            else:
                state = self.reset()
                terminal = False
        return state, reward, terminal, {'done': float(abs(self.done_counter - prev_done_counter))}
Esempio n. 5
0
class MyEnv(Environment):
    VALIDATION_MODE = 0

    def __init__(self, rng, game=None, frame_skip=4, 
            ple_options={"display_screen": True, "force_fps":True, "fps":30}):

        self._mode = -1
        self._mode_score = 0.0
        self._mode_episode_count = 0

        self._frameSkip = frame_skip if frame_skip >= 1 else 1
        self._random_state = rng
       
        if game is None:
            raise ValueError("Game must be provided")

        self._ple = PLE(game, **ple_options)
        self._ple.init()

        w, h = self._ple.getScreenDims()
        self._screen = np.empty((h, w), dtype=np.uint8)
        self._reducedScreen = np.empty((48, 48), dtype=np.uint8)
        self._actions = self._ple.getActionSet()

                
    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._mode_score = 0.0
                self._mode_episode_count = 0
            else:
                self._mode_episode_count += 1
        elif self._mode != -1: # and thus mode == -1
            self._mode = -1

        self._ple.reset_game()
        for _ in range(self._random_state.randint(15)):
            self._ple.act(self._ple.NOOP)
        self._screen = self._ple.getScreenGrayscale()
        cv2.resize(self._screen, (48, 48), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
        
        return [4 * [48 * [48 * [0]]]]
        
        
    def act(self, action):
        action = self._actions[action]
        
        reward = 0
        for _ in range(self._frameSkip):
            reward += self._ple.act(action)
            if self.inTerminalState():
                break
            
        self._screen = self._ple.getScreenGrayscale()
        cv2.resize(self._screen, (48, 48), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
  
        self._mode_score += reward
        return np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._mode_episode_count += 1
        print("== Mean score per episode is {} over {} episodes ==".format(self._mode_score / self._mode_episode_count, self._mode_episode_count))


    def inputDimensions(self):
        return [(4, 48, 48)]

    def observationType(self, subject):
        return np.uint8

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._reducedScreen)]

    def inTerminalState(self):
        return self._ple.game_over()
class OriginalGameEnv(gym.Env):
    def __init__(self, task={}):
        self._task = task
        os.environ['SDL_VIDEODRIVER'] = 'dummy'

        import importlib
        game_module = importlib.import_module('ple.games.originalgame')
        game = getattr(game_module, 'originalGame')()

        self.game_state = PLE(game, fps=30, display_screen=False)
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_width, self.screen_height = self.game_state.getScreenDims()
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3))

        self.num_actions = len(self._action_set)
        self.viewer = None

    def seed(self, seed=None):
        if not seed:
            seed = np.random.randint(2**31 - 1)
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng
        self.game_state.init()
        return [seed]

    def reset_task(self, task):
        pass

    def render(self, mode='human'):
        img = self._get_image()
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def reset(self):
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3))
        self.game_state.reset_game()
        state = self._get_image()
        return state

    def _get_image(self):
        image_rotated = np.fliplr(
            np.rot90(self.game_state.getScreenRGB(),
                     3))  # Hack to fix the rotated image returned by ple
        return image_rotated

    def step(self, action):
        reward = self.game_state.act(self._action_set[action])
        state = self._get_image()
        terminal = self.game_state.game_over()
        return state, reward, terminal, {}
Esempio n. 7
0
class MonsterKongEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, map_config):
        self.map_config = map_config
        self.game = MonsterKong(self.map_config)

        self.fps = 30
        self.frame_skip = 1
        self.num_steps = 1
        self.force_fps = True
        self.display_screen = True
        self.nb_frames = 500
        self.reward = 0.0
        self.episode_end_sleep = 0.2

        if map_config.has_key('fps'):
            self.fps = map_config['fps']
        if map_config.has_key('frame_skip'):
            self.frame_skip = map_config['frame_skip']
        if map_config.has_key('force_fps'):
            self.force_fps = map_config['force_fps']
        if map_config.has_key('display_screen'):
            self.display_screen = map_config['display_screen']
        if map_config.has_key('episode_length'):
            self.nb_frames = map_config['episode_length']
        if map_config.has_key('episode_end_sleep'):
            self.episode_end_sleep = map_config['episode_end_sleep']
        self.current_step = 0

        self._seed()

        self.p = PLE(self.game,
                     fps=self.fps,
                     frame_skip=self.frame_skip,
                     num_steps=self.num_steps,
                     force_fps=self.force_fps,
                     display_screen=self.display_screen,
                     rng=self.rng)

        self.p.init()

        self._action_set = self.p.getActionSet()[1:]
        self.action_space = spaces.Discrete(len(self._action_set))
        (screen_width, screen_height) = self.p.getScreenDims()
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(screen_height, screen_width,
                                                   3))

    def _seed(self, seed=24):
        self.rng = seed

    def _step(self, action_taken):
        reward = 0.0
        action = self._action_set[action_taken]
        reward += self.p.act(action)
        obs = self.p.getScreenRGB()
        done = self.p.game_over()
        info = {'PLE': self.p}
        self.current_step += 1
        if self.current_step >= self.nb_frames:
            done = True
        return obs, reward, done, info

    def _reset(self):
        self.current_step = 0
        # Noop and reset if done
        start_done = True
        while start_done:
            self.p.reset_game()
            _, _, start_done, _ = self._step(4)
            #self.p.init()
        if self.p.display_screen:
            self._render()
            if self.episode_end_sleep > 0:
                time.sleep(self.episode_end_sleep)
        return self.p.getScreenRGB()

    def _render(self, mode='human', close=False):
        if close:
            return  # TODO: implement close
        original = self.p.display_screen
        self.p.display_screen = True
        self.p._draw_frame()
        self.p.display_screen = original
Esempio n. 8
0
class PLEFlappyBirdEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self,
                 game_name='FlappyBird',
                 display_screen=True,
                 ple_game=True,
                 obs_type="Image",
                 reward_type=1):
        '''
        For FlappyBird:
            getGameState() returns [player y position, player velocity,
                                    next pipe distance to player, next pipe top y position,
                                    next pipe bottom y position, next next pipe distance,
                                    next next pipe top y, next next pipe bottom y]
        @Params:
            obs_type :
                "RAM" : getGameState()
                "Image" : (512, 288, 3)
            reward_type :
                0 : means [reward1, reward2]
                1 : means raw reward
                2 : means change of y-axis distance from the middle of next top pipe ans bottom pipe
        '''
        # set headless mode
        os.environ['SDL_VIDEODRIVER'] = 'dummy'

        # open up a game state to communicate with emulator
        import importlib
        if ple_game:
            game_module_name = ('ple.games.%s' % game_name).lower()
        else:
            game_module_name = game_name.lower()
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)()

        ##################################################################
        # old one
        #self.game_state = PLE(game, fps=30, display_screen=display_screen)

        # use arg state_preprocessor to support self.game_state.getGameState()
        self.game_state = PLE(game,
                              fps=30,
                              display_screen=display_screen,
                              state_preprocessor=self.process_state)
        ##################################################################

        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_height, self.screen_width = self.game_state.getScreenDims()
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3),
                                            dtype=np.uint8)
        self.viewer = None

        ############################################
        self.obs_type = obs_type
        self.reward_type = reward_type

        # every reward type's max-abs value
        self.rewards_ths = [5.0, 10.0]

        # change observation space:
        self.img_width = 84
        self.img_height = 84
        self.img_shape = (self.img_width, self.img_height, 3)
        if self.obs_type == "Image":
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=self.img_shape,
                                                dtype=np.uint8)
        elif self.obs_type == "RAM":
            self.observation_space = spaces.Box(low=-100.0,
                                                high=100.0,
                                                shape=(8, ),
                                                dtype=np.float32)
        ############################################

    #############################################
    # Add state processer
    def process_state(self, state):
        return np.array([state.values()])

    #############################################

    def _step(self, a, gamma=0.99):
        #############################################
        if isinstance(a, np.ndarray):
            a = a[0]
        # old observation
        old_ram = self.game_state.getGameState()
        #############################################

        reward = self.game_state.act(self._action_set[a])

        #############################################
        #state = self._get_image()
        if self.obs_type == "Image":
            state = self._get_image()
        #############################################

        terminal = self.game_state.game_over()

        #############################################
        # new observation
        ram = self.game_state.getGameState()
        #############################################

        #############################################
        pass_pipe = False
        # pass one pipe
        if reward == 1.0:
            pass_pipe = True

        if self.reward_type == 1:
            reward = reward / self.rewards_ths[0]

        # reward 2
        if self.reward_type == 2:
            reward = self.get_reward(reward, old_ram, ram, terminal, 2,
                                     pass_pipe, gamma)

        # reward 0
        if self.reward_type == 0:
            reward1 = reward / self.rewards_ths[0]
            reward2 = self.get_reward(reward, old_ram, ram, terminal, 2,
                                      pass_pipe, gamma)
            reward = np.array([reward1, reward2])
            '''
            if reward1 > 0.0:
                print("Pass one pipe:", reward)
                print("Old ram:", list(old_ram[0]))
                print("Ram:", list(ram[0]))
            '''
        ##############################################

        ############################################################
        # reward scaling
        '''
        if self.reward_type == 0:
            for rt in range(len(reward)):
                reward[rt] = reward[rt] / self.rewards_ths[rt]
        else:
            reward = reward / self.rewards_ths[self.reward_type - 1]
        '''
        ############################################################

        ##############################################
        # obs
        if self.obs_type == "RAM":
            state = self.game_state.getGameState()
            state = np.array(list(state[0]))
        ##############################################

        return state, reward, terminal, {}

    #############################################
    # Add for reward
    #############################################
    def get_reward(self,
                   src_reward,
                   old_ram,
                   ram,
                   done,
                   reward_type,
                   pass_pipe,
                   gamma=0.99):
        ''' 
        @Params:
            old_ram, ram : numpy.array, [dict_values([x1, x2, ..., x8])]
            reward_type : 2 , change of y-axis distance from the middle line of the next top and bottom pipe
        '''
        old_ram = list(old_ram[0])
        ram = list(ram[0])

        reward = src_reward
        if not (done or pass_pipe):
            if reward_type == 2:
                # distance to middle of two pipes
                old_py, old_top_y, old_bottom_y = old_ram[0], old_ram[
                    3], old_ram[4]
                py, top_y, bottom_y = ram[0], ram[3], ram[4]
                old_dis = abs(old_py - (old_top_y + old_bottom_y) / 2.0)
                dis = abs(py - (top_y + bottom_y) / 2.0)
                reward = (src_reward / self.rewards_ths[0]
                          ) + (old_dis - gamma * dis) / self.rewards_ths[1]
                '''
                # if pipes changed, reward = 0.0
                old_next_pipe_distance = old_ram[2]
                next_pipe_distance = ram[2]
                print(old_ram, ram)
                print(old_next_pipe_distance, next_pipe_distance, old_dis, dis, old_top_y, old_bottom_y, top_y, bottom_y, reward)
                '''
        return reward

    #############################################
    #############################################

    def _get_image(self):
        image_rotated = np.fliplr(
            np.rot90(self.game_state.getScreenRGB(),
                     3))  # Hack to fix the rotated image returned by ple
        '''
        try:
            self.cnt += 1
        except Exception:
            self.cnt = 0
        if self.cnt <= 10000:
            img = Image.fromarray(image_rotated)
            img.save("/home/lxcnju/workspace/flappy_bird_images/fb_{}.jpg".format(self.cnt))
        '''

        ##########################################
        # resize image
        img = Image.fromarray(image_rotated)
        img = img.resize((self.img_width, self.img_height), Image.ANTIALIAS)
        image_resized = np.array(img).astype(np.uint8)
        ##########################################

        return image_resized

    @property
    def _n_actions(self):
        return len(self._action_set)

    # return: (states, observations)
    def _reset(self):
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3),
                                            dtype=np.uint8)
        self.game_state.reset_game()

        #######################################
        if self.obs_type == "Image":
            state = self._get_image()
        elif self.obs_type == "RAM":
            state = self.game_state.getGameState()
            state = np.array(list(state[0]))
        #######################################

        return state

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        img = self._get_image()
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def _seed(self, seed):
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng

        self.game_state.init()
Esempio n. 9
0
class ple_wrapper:
    def __init__(self, game, display_screen=False):
        from ple import PLE
        assert game in [
            'catcher', 'monsterkong', 'flappybird', 'pixelcopter', 'pong',
            'puckworld', 'raycastmaze', 'snake', 'waterworld'
        ]
        if game == 'catcher':
            from ple.games.catcher import Catcher
            env = Catcher()
        elif game == 'monsterkong':
            from ple.games.monsterkong import MonsterKong
            env = MonsterKong()
        elif game == 'flappybird':
            from ple.games.flappybird import FlappyBird
            env = FlappyBird()
        elif game == 'pixelcopter':
            from ple.games.pixelcopter import Pixelcopter
            env = Pixelcopter()
        elif game == 'pong':
            from ple.games.pong import Pong
            env = Pong()
        elif game == 'puckworld':
            from ple.games.puckworld import PuckWorld
            env = PuckWorld()
        elif game == 'raycastmaze':
            from ple.games.raycastmaze import RaycastMaze
            env = RaycastMaze()
        elif game == 'snake':
            from ple.games.snake import Snake
            env = Snake()
        elif game == 'waterworld':
            from ple.games.waterworld import WaterWorld
            env = WaterWorld()

        self.p = PLE(env, fps=30, display_screen=display_screen)
        self.action_set = self.p.getActionSet()
        self.action_size = len(self.action_set)
        self.screen_dims = self.p.getScreenDims()
        self.p.init()

    def gray_scale(self, frame):
        gray_scale_frame = np.dot(frame, np.array([.299, .587,
                                                   .114])).astype(np.uint8)
        assert gray_scale_frame.shape == frame.shape[:-1]
        return gray_scale_frame

    def get_screen(self):
        return np.transpose(self.gray_scale(self.p.getScreenRGB()))

    def reset(self):
        self.p.reset_game()
        state, _, done = self.step(-1)
        assert done == False
        return state
        #return self.get_screen()

    def step(self, action):
        reward = self.p.act(self.action_set[action])
        state_ = self.get_screen()
        done = self.p.game_over()

        return state_, reward, done
Esempio n. 10
0
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from tflearn.layers.conv import conv_2d, max_pool_2d, highway_conv_2d
from tflearn.layers.normalization import local_response_normalization, batch_normalization
from statistics import mean, median
from collections import Counter
from ple.games.flappybird import FlappyBird
from ple.games.catcher import Catcher
from ple import PLE

game = FlappyBird()
env = PLE(game, fps=30, display_screen=True)  # environment interface to game
env.init()
print(env.getActionSet())  #97 i 100
#119 za flappy
print(env.getScreenDims())

LR = 1e-3
#env = gym.make('SpaceInvaders-v0')
#print(env.observation_space)
#print(env.action_space)
#env.reset()
goal_steps = 10000
score_requirement = 30
initial_games = 1000


def initial_population():
    training_data = []
    scores = []
    accepted_scores = []
Esempio n. 11
0
class PLEEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}


    def __init__(self, game_name='FlappyBird', display_screen=True):
        # set headless mode
        os.environ['SDL_VIDEODRIVER'] = 'dummy'

        # open up a game state to communicate with emulator
        import importlib
        game_module_name = ('ple.games.%s' % game_name).lower()
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)()

        #* converts non-visual state representation to numpy array
        def process_state(state):
                return np.array([ state.values() ])

        self.game_state = PLE(game, fps=30, display_screen=display_screen, state_preprocessor=process_state) #* added state_preprocessor
        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_height, self.screen_width = self.game_state.getScreenDims()
        #self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8)
        #self.observation_space = spaces.Box(self.low, self.high)
        self.viewer = None



    def _step(self, a):
        reward = self.game_state.act(self._action_set[a])
        state = self.game_state.getGameState()
        terminal = self.game_state.game_over()

        '''
        reward system:
        did you die? -1000
        else +1
        '''
        if terminal == True:
            reward = -1000
        else:
            reward = 1


        return state, reward, terminal, {}

    def _get_image(self):
        image_rotated = np.fliplr(np.rot90(self.game_state.getScreenRGB(),3)) # Hack to fix the rotated image returned by ple
        return image_rotated

    @property
    def _n_actions(self):
        return len(self._action_set)

    # return: (states, observations)
    def _reset(self):
        #self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8)
        self.game_state.reset_game()
        state = self.game_state.getGameState()
        return state

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        img = self._get_image()
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)


    def _seed(self, seed):
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng

        self.game_state.init()
Esempio n. 12
0
class SnakeQNetwork:
    def __init__(self,
                 food_reward=10,
                 dead_reward=-10,
                 alive_reward=2,
                 discount_factor=0.95,
                 batch_size=10,
                 train_epochs=100,
                 history_size=1000,
                 history_sample_size=50):
        self.food_reward = food_reward
        self.dead_reward = dead_reward
        self.alive_reward = alive_reward
        self.discount_factor = discount_factor
        self.batch_size = batch_size
        self.train_epochs = train_epochs
        self.history_size = history_size
        self.history_sample_size = history_sample_size
        self.q_learning_history = QLearningHistory(history_size)
        self.exploration_factor = 0.2
        self.next_move_prediction = None
        self.is_neural_network_initialized = False
        pygame.init()
        self.game = Snake(width=64, height=64)
        self.env = PLE(self.game, display_screen=True)
        self.env.init()
        self.LOG = gym.logger

    def run(self,
            maximum_number_of_iterations=10000,
            learning_rate=0.5,
            training=False):

        for iteration in range(0, maximum_number_of_iterations):

            if not self.is_neural_network_initialized:
                self.___initialize_neural_newtork()
                self.is_neural_network_initialized = True

            observation = self.env.getScreenGrayscale()
            observation_width = self.env.getScreenDims()[0]
            observation_height = self.env.getScreenDims()[1]
            self.game.init()

            # exit the while loop only if it's GAME OVER
            while True:

                q_values = self.next_move_prediction.predict(
                    x=observation.reshape(
                        1, observation_width * observation_height),
                    batch_size=1)
                best_snake_action = np.argmax(q_values)
                reward = self.__take_snake_action(best_snake_action)
                previous_observation = copy.deepcopy(observation)
                observation = self.env.getScreenGrayscale()
                is_game_over = self.env.game_over()

                self.LOG.info(
                    "Current action reward: {r}. Is game over: {d}".format(
                        r=reward, d=is_game_over))

                if training:
                    reward = self.__get_custom_reward(reward)

                    self.q_learning_history.record_event(
                        state=previous_observation,
                        action=best_snake_action,
                        reward=reward,
                        new_state=observation)

                    last_event = self.q_learning_history.get_last_event()
                    self.LOG.info(
                        "Added event #{n} to history. Action: {a}; Reward: {r}"
                        .format(a=last_event[1],
                                r=reward,
                                n=self.q_learning_history.size))

                    if self.q_learning_history.is_full():
                        history_batch = random.sample(
                            self.q_learning_history.get_events(),
                            self.history_sample_size)
                        self.LOG.info(
                            "Sampling {n} events from history.".format(
                                n=self.history_sample_size))

                        training_batch_data = []
                        training_batch_labels = []

                        for history_event in history_batch:
                            old_state, action, reward, new_state = history_event

                            q_values_before_action = self.next_move_prediction.predict(
                                x=old_state.reshape(
                                    1, observation_width * observation_height),
                                batch_size=1)

                            q_values_after_action = self.next_move_prediction.predict(
                                x=new_state.reshape(
                                    1, observation_width * observation_height),
                                batch_size=1)

                            best_q_value_after_action = np.argmax(
                                q_values_after_action)

                            training_q_values = np.zeros((1, 4))

                            for value_idx in range(
                                    0, len(q_values_before_action)):
                                training_q_values[
                                    value_idx] = q_values_before_action[
                                        value_idx]

                            output_update = learning_rate * (
                                reward + (self.discount_factor *
                                          best_q_value_after_action))

                            training_q_values[0][:] = 0
                            training_q_values[0][action] = output_update

                            training_batch_data.append(
                                old_state.reshape(
                                    observation_width * observation_height, ))
                            training_batch_labels.append(
                                training_q_values.reshape(4, ))

                        training_batch_data = np.array(training_batch_data)
                        training_batch_labels = np.array(training_batch_labels)

                        self.next_move_prediction.fit(
                            x=training_batch_data,
                            y=training_batch_labels,
                            epochs=self.train_epochs,
                            batch_size=self.batch_size)

                if is_game_over:
                    break

            if self.exploration_factor > 0.1:
                self.exploration_factor -= (1.0 / maximum_number_of_iterations)
                self.LOG.info(
                    "Exploration factor updated! New value: {v}".format(
                        v=self.exploration_factor))

    def ___initialize_neural_newtork(self):
        input_layer_size = self.env.getScreenDims(
        )[0] * self.env.getScreenDims()[1]
        hidden_layer_size = 100
        output_layer_size = 4

        input_layer = Dense(kernel_initializer='lecun_uniform',
                            units=hidden_layer_size,
                            input_shape=(input_layer_size, ),
                            activation='sigmoid')

        hidden_layer = Dense(kernel_initializer='lecun_uniform',
                             units=output_layer_size,
                             activation='linear')

        self.next_move_prediction = Sequential()
        self.next_move_prediction.add(input_layer)
        self.next_move_prediction.add(hidden_layer)

        self.next_move_prediction.compile(optimizer='rmsprop',
                                          loss='mean_squared_error')

    def __take_snake_action(self, snake_action):

        random_number = np.random.random_sample()

        if not self.q_learning_history.is_full():
            snake_action = random.choice(self.env.getActionSet())
            self.LOG.info("Snake chose to do a random move - add to qHistory!")
            return self.env.act(snake_action)

        elif random_number < self.exploration_factor:
            snake_action = random.choice(self.env.getActionSet())
            self.LOG.info(
                "Random number is smaller than exploration factor, {r} < {ef}! Snake chose random move!"
                .format(r=random_number, ef=self.exploration_factor))
            return self.env.act(snake_action)

        elif snake_action == 0:
            self.LOG.info("Snake chose to go up")
            return self.env.act(115)

        elif snake_action == 1:
            self.LOG.info("Snake chose to go left")
            return self.env.act(97)

        elif snake_action == 2:
            self.LOG.info("Snake chose to go down")
            return self.env.act(119)

        elif snake_action == 3:
            self.LOG.info("Snake chose to go right")
            return self.env.act(100)

    def __get_custom_reward(self, reward):
        if reward >= 1:
            self.LOG.info(
                "Has eaten food! Reward is {r}".format(r=self.food_reward))
            return self.food_reward
        elif reward >= 0:
            self.LOG.info(
                "Stayed alive! Reward is {r}".format(r=self.alive_reward))
            return self.alive_reward
        else:
            self.LOG.info("Crashed! Reward is {r}".format(r=self.dead_reward))
            return self.dead_reward
Esempio n. 13
0
class PLEEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self,
                 game_name='FlappyBird',
                 display_screen=True,
                 observe_state=False):
        # open up a game state to communicate with emulator
        import importlib
        game_module_name = ('ple.games.%s' % game_name).lower()
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)()
        self.game_state = PLE(game,
                              fps=30,
                              display_screen=display_screen,
                              state_preprocessor=state_preprocessor)
        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_width, self.screen_height = self.game_state.getScreenDims()
        if self.screen_height + self.screen_width > 500:
            img_scale = 0.25
        else:
            img_scale = 1.0
        self.screen_width = int(self.screen_width * img_scale)
        self.screen_height = int(self.screen_height * img_scale)
        self.observe_state = observe_state
        if self.observe_state:
            # the bounds are typically not infinity
            self.observation_space = spaces.Box(
                low=-float('inf'),
                high=float('inf'),
                shape=self.game_state.state_dim)
        else:
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self.screen_height,
                                                       self.screen_width, 3))
        self.viewer = None

    def _step(self, a):
        reward = self.game_state.act(self._action_set[a])
        if self.observe_state:
            state = self.game_state.getGameState()
        else:
            state = self._get_image()
        terminal = self.game_state.game_over()
        return state, reward, terminal, {}

    def _resize_frame(self, frame):
        pil_image = Image.fromarray(frame)
        pil_image = pil_image.resize((self.screen_width, self.screen_height),
                                     Image.ANTIALIAS)
        return np.array(pil_image)

    def _get_image(self):
        image_rotated = np.fliplr(
            np.rot90(self.game_state.getScreenRGB(),
                     3))  # Hack to fix the rotated image returned by ple
        return self._resize_frame(image_rotated)

    @property
    def _n_actions(self):
        return len(self._action_set)

    # return: (states, observations)
    def _reset(self, **kwargs):
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_height,
                                                   self.screen_width, 3))
        self.game_state.reset_game(**kwargs)
        if self.observe_state:
            state = self.game_state.getGameState()
        else:
            state = self._get_image()
        return state

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        img = self._get_image()
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def _seed(self, seed):
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng

        self.game_state.init()
Esempio n. 14
0
class PLECatcherEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, game_name='Catcher', display_screen=True, ple_game=True, obs_type="Image", reward_type = 1):
        '''
        For Catcher:
            getGameState() returns [player x position, player velocity, fruits x position, fruits y position]
        @Params:
            obs_type :
                "RAM" : getGameState()
                "Image" : (64, 64, 3)
            reward_type :
                0 : means [reward1, reward2]
                1 : means raw reward
                2 : means change of x-axis distance from fruit
        '''
        # set headless mode
        os.environ['SDL_VIDEODRIVER'] = 'dummy'
        
        # open up a game state to communicate with emulator
        import importlib
        if ple_game:
            game_module_name = ('ple.games.%s' % game_name).lower()
        else:
            game_module_name = game_name.lower()
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)()

        ##################################################################
        # old one
        #self.game_state = PLE(game, fps=30, display_screen=display_screen)

        # use arg state_preprocessor to support self.game_state.getGameState()
        self.game_state = PLE(game, fps=30, display_screen=display_screen, state_preprocessor = self.process_state)
        ##################################################################

        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_height, self.screen_width = self.game_state.getScreenDims()
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype = np.uint8)
        self.viewer = None

        ############################################
        self.obs_type = obs_type
        self.reward_type = reward_type

        # every reward type's max-abs value
        self.rewards_ths = [1.0, 2.0]

        # change observation space:
        self.img_width = 84
        self.img_height = 84
        self.img_shape = (self.img_width, self.img_height, 3)
        if self.obs_type == "Image":
            self.observation_space = spaces.Box(low = 0, high = 255, shape = self.img_shape, dtype = np.uint8)
        elif self.obs_type == "RAM":
            self.observation_space = spaces.Box(low = -100.0, high = 100.0, shape = (4, ), dtype = np.float32)
        ############################################


    #############################################
    # Add state processer
    def process_state(self, state):
        return np.array([state.values()])
    #############################################

    def _step(self, a, gamma = 0.99):
        #############################################
        if isinstance(a,np.ndarray):
            a = a[0]
        # old observation
        old_ram = self.game_state.getGameState()
        #############################################

        reward = self.game_state.act(self._action_set[a])

        #############################################
        #state = self._get_image()
        if self.obs_type == "Image":
            state = self._get_image()
        #############################################

        terminal = self.game_state.game_over()

        #############################################
        # new observation
        ram = self.game_state.getGameState()
        #############################################

        #############################################
        if self.reward_type == 1:
            reward = reward / self.rewards_ths[0]

        # reward 2
        if self.reward_type == 2:
            reward = self.get_reward(reward, old_ram, ram, terminal, 2, gamma)

        # reward 0
        if self.reward_type == 0:
            reward1 = reward / self.rewards_ths[0]
            reward2 = self.get_reward(reward, old_ram, ram, terminal, 2, gamma)
            reward = np.array([reward1, reward2])
        ##############################################

        ############################################################
        '''
        # reward scaling
        if self.reward_type == 0:
            for rt in range(len(reward)):
                reward[rt] = reward[rt] / self.rewards_ths[rt]
        else:
            reward = reward / self.rewards_ths[self.reward_type - 1]
        '''
        ############################################################

        ##############################################
        # obs
        if self.obs_type == "RAM":
            state = self.game_state.getGameState()
            state = np.array(list(state[0]))
        ##############################################

        return state, reward, terminal, {}

    #############################################
    # Add for reward
    #############################################
    def get_reward(self, src_reward, old_ram, ram, done, reward_type, gamma):
        ''' 
        @Params:
            old_ram, ram : numpy.array, [dict_values([x, y, z, w])]
            reward_type : 2 , distance of x-axis change
        '''
        old_ram = list(old_ram[0])
        ram = list(ram[0])
        reward = src_reward
        if not done:
            if reward_type == 2:
                old_px, old_fx = old_ram[0], old_ram[2]
                px, fx = ram[0], ram[2]
                old_dis = abs(old_px - old_fx)
                dis = abs(px - fx)
                reward = old_dis - gamma * dis

                # a new epoch
                old_fy, fy = old_ram[3], ram[3]
                if old_fy > fy:
                    reward = 0.0

                reward = min(reward, 2.0)
                reward = max(reward, -2.0)

                reward = src_reward / self.rewards_ths[0] + reward / self.rewards_ths[1]
        return reward
    #############################################
    #############################################

    def _get_image(self):
        image_rotated = np.fliplr(np.rot90(self.game_state.getScreenRGB(),3)) # Hack to fix the rotated image returned by ple
        ##########################################
        # resize image
        img = Image.fromarray(image_rotated)
        img = img.resize((self.img_width, self.img_height), Image.ANTIALIAS)
        image_resized = np.array(img).astype(np.uint8)
        ##########################################
        return image_resized

    @property
    def _n_actions(self):
        return len(self._action_set)

    # return: (states, observations)
    def _reset(self):
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype = np.uint8)
        self.game_state.reset_game()
        #######################################
        if self.obs_type == "Image":
            state = self._get_image()
        elif self.obs_type == "RAM":
            state = self.game_state.getGameState()
            state = np.array(list(state[0]))
        #######################################
        return state

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        img = self._get_image()
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)


    def _seed(self, seed):
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng

        self.game_state.init()
Esempio n. 15
0
class PLEEnv(gym.Env):
    metadata = {"render.modes": ["human", "rgb_array"]}

    def __init__(self,
                 game_name="FlappyBird",
                 display_screen=True,
                 ple_game=True,
                 **kwargs):
        # set headless mode
        os.environ["SDL_VIDEODRIVER"] = "dummy"

        # open up a game state to communicate with emulator
        import importlib

        if ple_game:
            game_module_name = ("ple.games.%s" % game_name).lower()
        else:
            game_module_name = game_name.lower()
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)(**kwargs)
        self.game_state = PLE(game, fps=30, display_screen=display_screen)
        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_height, self.screen_width = self.game_state.getScreenDims()
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3),
                                            dtype=np.uint8)
        self.viewer = None

    def step(self, a):
        reward = self.game_state.act(self._action_set[a])
        state = self.get_image()
        terminal = self.game_state.game_over()
        return state, reward, terminal, {}

    def get_image(self):
        image_rotated = np.fliplr(
            np.rot90(self.game_state.getScreenRGB(),
                     3))  # Hack to fix the rotated image returned by ple
        return image_rotated

    @property
    def n_actions(self):
        return len(self._action_set)

    # return: (states, observations)
    def reset(self):
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3),
                                            dtype=np.uint8)
        self.game_state.reset_game()
        state = self.get_image()
        return state

    def render(self, mode="human", close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        img = self.get_image()
        if mode == "rgb_array":
            return img
        elif mode == "human":
            from gym.envs.classic_control import rendering

            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def seed(self, seed):
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None
def main():
    global render_bool
    render_bool = True
    # parl.connect('localhost:8037')
    if dummy_mode:
        render_bool = False
    if not render_bool:
        os.environ["SDL_VIDEODRIVER"] = "dummy"
    # else:
    #     pygame.display.set_mode((800, 600 + 60))
    # 创建环境
    game = GameEnv()
    p = PLE(game, display_screen=render_bool, fps=30, force_fps=True
            )  # , fps=30, display_screen=render_bool, force_fps=True)

    p.init()

    # 根据parl框架构建agent
    print(p.getActionSet())
    act_dim = len(p.getActionSet())
    width, height = p.getScreenDims()
    rpm = ReplayMemory(MEMORY_SIZE)  # DQN的经验回放池
    obs_dim = get_env_obs(p).shape
    model = Model(act_dim=act_dim)
    if MODE == "DDPG":
        alg = RL_Alg(model,
                     gamma=GAMMA,
                     tau=0.001,
                     actor_lr=LEARNING_RATE,
                     critic_lr=LEARNING_RATE)
    if MODE == "DQN":
        alg = RL_Alg(model, gamma=GAMMA, lr=LEARNING_RATE, act_dim=act_dim)
    agent = Agent(alg, obs_dim=obs_dim,
                  act_dim=act_dim)  # e_greed有一定概率随机选取动作,探索

    # 加载模型
    best_eval_reward = -1000
    cache_fn = './model_pixelcopter_%s.ckpt' % MODE
    if os.path.exists(cache_fn):
        print("loaded model:", cache_fn)
        agent.restore(cache_fn)
        best_eval_reward = evaluate(p, agent, render=render_bool)
        # run_episode(env, agent, train_or_test='test', render=True)
        # exit()
    # 先往经验池里存一些数据,避免最开始训练的时候样本丰富度不够
    while len(rpm) < MEMORY_WARMUP_SIZE:
        run_episode(p, agent, rpm)

    max_episode = 200000
    # 开始训练
    episode = 0

    while episode < max_episode:  # 训练max_episode个回合,test部分不计算入episode数量
        # train part
        for i in range(0, 5):
            total_reward = run_episode(p, agent, rpm)
            episode += 1
        # test part
        eval_reward = evaluate(p, agent,
                               render=render_bool)  # render=True 查看显示效果
        logger.info('episode:{}    e_greed:{}   test_reward:{}'.format(
            episode, e_greed, eval_reward))

        # 保存模型到文件 ./model.ckpt
        agent.save(cache_fn + "." + str(rate_num))
        if best_eval_reward < eval_reward:
            best_eval_reward = eval_reward
            agent.save(cache_fn)
Esempio n. 17
0
class AngryBirdEnv(gym.Env):
    def __init__(self, display_screen=True):

        self.game_state = PLE(AngryBird(render=display_screen),
                              fps=30,
                              display_screen=display_screen)
        #self.game_state.init()

        self.display_screen = display_screen
        self._action_set = self.game_state.getActionSet()

        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_height, self.screen_width = self.game_state.getScreenDims()

        self.observation_space = spaces.Box(low=0, high=255, \
                                            shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8)
        self.viewer = None

    def step(self, a):

        states = []
        if self._action_set[a] == K_r:

            while len(self.game_state.game.player.plan) > 1:

                _ = self.game_state.act(list(self._action_set)[a])
                state = self._get_image()
                if self.display_screen:
                    self.render()

                states.append(state)

        reward = self.game_state.act(list(self._action_set)[a])
        if self.display_screen:
            self.render()
        reward = self.game_state.game.getScore(
        ) and not self.game_state.game.player.died

        terminal = (self.game_state.game_over()
                    or self.game_state.game.player.died)

        if self._action_set[a] != K_r or len(states) == 0:
            states = self._get_image()
            reward = 0
            #states.append(self._get_image())
            pass
        else:
            states = states[0]  #temporary
            self.reset()

        assert reward in [0, 1
                          ], 'Reward is not what it should be:  ' + str(reward)

        return states, reward, terminal, {}

    def _get_image(self):
        image_rotated = np.fliplr(np.rot90(self.game_state.getScreenRGB(), 3))
        return image_rotated

    @property
    def _n_actions(self):
        return len(self._action_set)

    def reset(self):
        '''
        Performs ther eset of the gym env
        '''
        #if self.display_screen:
        #   time.sleep(1)
        self.observation_space = spaces.Box(low=0, high=255, \
                                            shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8)

        self.game_state.game.reset()

        state = self._get_image()
        if self.display_screen:
            self.render()
        return state

    def render(self, mode='rgb_array', close=False):
        '''
        Performs the rendering for the gym env
        '''
        if self.display_screen:
            if close:
                if self.viewer is not None:
                    self.viewer.close()
                    self.viewer = None
                    return
            img = self._get_image()
            if mode == 'rgb_array':
                return img
            elif mode == 'human':
                from gym.envs.classic_control import rendering
                if self.viewer is None:
                    self.viewer = rendering.SimpleImageViewer()
                self.viewer.imshow(img)

    def _seed(self, _):
        self.game_state.init()

    def reset_hard(self):
        try:
            self.close()
            self.viewer.close()
            self.viewer = None
        except:
            pass
        self.__init__()
Esempio n. 18
0
class PLEEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self,
                 game_name='FlappyBird',
                 display_screen=True,
                 ple_game=True,
                 root_game_name=None,
                 reward_type='sparse',
                 obs_type=None,
                 **kwargs):
        # set headless mode
        os.environ['SDL_VIDEODRIVER'] = 'dummy'
        os.environ['SDL_AUDIODRIVER'] = 'dummy'
        # open up a game state to communicate with emulator
        import importlib
        if ple_game:
            game_module_name = ('ple.games.%s' % game_name).lower()
        else:
            game_module_name = F"{root_game_name.lower()}.envs"
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)(**kwargs)
        self.ple_wrapper = PLE(game, fps=30, display_screen=display_screen)
        self.ple_wrapper.init()
        game.reward_type = reward_type
        self._action_set = self.ple_wrapper.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_height, self.screen_width = self.ple_wrapper.getScreenDims(
        )
        # Assume observation space to be (64, 64, 3) due to procgen
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(64, 64, 3),
                                            dtype=np.uint8)
        self.viewer = None
        assert obs_type is not None, obs_type
        self.obs_type = obs_type

        self.reward_range = game.rewards['win']

    def step(self, a):
        reward = self.ple_wrapper.act(self._action_set[a])
        if self.obs_type == 'state':
            state = self.ple_wrapper.game.get_state()
        elif self.obs_type == 'image':
            state = self._get_image()
        terminal = self.ple_wrapper.game_over()
        return state, reward, terminal, {}

    def _get_image(self):
        image_rotated = np.fliplr(
            np.rot90(self.ple_wrapper.getScreenRGB(),
                     3))  # Hack to fix the rotated image returned by ple
        return cv2.resize(image_rotated, (64, 64),
                          interpolation=cv2.INTER_AREA)

    @property
    def _n_actions(self):
        return len(self._action_set)

    # return: (states, observations)
    def reset(self):
        self.ple_wrapper.reset_game()
        if self.obs_type == 'state':
            state = self.ple_wrapper.game.get_state()
        elif self.obs_type == 'image':
            state = self._get_image()
        return state

    def render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        img = self._get_image()
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def seed(self, seed):
        rng = np.random.RandomState(seed)
        self.ple_wrapper.rng = rng
        self.ple_wrapper.game.rng = self.ple_wrapper.rng

        self.ple_wrapper.init()

    def get_keys_to_action(self):
        return {
            (): 0,
            (32, ): 1,
            (119, ): 2,
            (100, ): 3,
            (97, ): 4,
            (115, ): 5,
            (100, 119): 6,
            (97, 119): 7,
            (100, 115): 8,
            (97, 115): 9,
            (32, 119): 10,
            (32, 100): 11,
            (32, 97): 12,
            (32, 115): 13,
            (32, 100, 119): 14,
            (32, 97, 119): 15,
            (32, 100, 115): 16,
            (32, 97, 115): 17
        }
# shooting agent
agent = ShootAgent(p.getActionSet())

# init agent and game.
p.init()

# lets do a random number of NOOP's
for i in range(np.random.randint(0, max_noops)):
    reward = p.act(p.NOOP)

# start our training loop
for f in range(nb_frames):
    # if the game is over
    if p.game_over():
        p.reset_game()
        print('game over')

    (screen_width, screen_height) = p.getScreenDims()
    print(screen_width, screen_height)
    print(p.getGameStateDims())
    obs = p.getScreenRGB()
    from PIL import Image
    img = Image.fromarray(obs)
    img.show()
    # state = p.getGameState()
    break
    # print(state)
    # action = agent.pickAction(reward, obs)
    # reward = p.act(action)
#     print('score: {}'.format(reward))
class PLEEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, game_name, display_screen=True):
        # set headless mode
        os.environ['SDL_VIDEODRIVER'] = 'dummy'
        # open up a game state to communicate with emulator
        import importlib
        game_module_name = ('ple.games.%s' % game_name).lower()
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)()
        self.game_state = PLE(game,
                              fps=30,
                              frame_skip=2,
                              display_screen=display_screen)
        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_width, self.screen_height = self.game_state.getScreenDims()
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3))
        self.viewer = None
        self.count = 0

    def step(self, a):
        reward = self.game_state.act(self._action_set[a])
        state = self._get_image()
        #import scipy.misc
        #scipy.misc.imsave('outfile'+str(self.count)+'.jpg', state)
        #self.count = self.count+1
        terminal = self.game_state.game_over()
        #print(randomAction)
        #print(a,self._action_set[a])
        return state, reward, terminal, {}

    def _get_image(self):
        #image_rotated = self.game_state.getScreenRGB()
        image_rotated = np.fliplr(
            np.rot90(self.game_state.getScreenRGB(),
                     3))  # Hack to fix the rotated image returned by ple
        return image_rotated

    @property
    def n_actions(self):
        return len(self._action_set)

    # return: (states, observations)
    def reset(self):
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3))
        self.game_state.reset_game()
        state = self._get_image()
        return state

    def render(self, mode='human', close=False):
        #print('HERE')
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        img = self._get_image()
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def seed(self, seed):
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng

        self.game_state.init()
Esempio n. 21
0
class PLEEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self,
                 prespecified_game=True,
                 game_name='MyCatcher',
                 display_screen=True,
                 rgb_state=False):
        # open up a game state to communicate with emulator
        import importlib
        if prespecified_game:
            game_module_name = ('ple.games.%s' % game_name).lower()
        else:
            game_module_name = ('domains.ple.%s' % game_name).lower()
        game_module = importlib.import_module(game_module_name)
        self.game = getattr(game_module, game_name)()
        self.rgb_state = rgb_state
        if self.rgb_state:
            self.game_state = PLE(self.game,
                                  fps=30,
                                  display_screen=display_screen)
        else:
            if prespecified_game:
                self.game_state = PLE(
                    self.game,
                    fps=30,
                    display_screen=display_screen,
                    state_preprocessor=process_state_prespecified)
            else:
                self.game_state = PLE(self.game,
                                      fps=30,
                                      display_screen=display_screen,
                                      state_preprocessor=process_state)
        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        if self.rgb_state:
            self.state_width, self.state_height = self.game_state.getScreenDims(
            )
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self.state_width,
                                                       self.state_height, 3))
        else:
            self.state_dim = self.game_state.getGameStateDims()
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=self.state_dim)
        self.viewer = None
        self.feature_bins = []
        if hasattr(self.game, 'feature_bins'):
            self.feature_bins = self.game.feature_bins

    def get_source_state(self, state):
        if hasattr(self.game, 'get_source_state'):
            return self.game.get_source_state(state)
        return None

    def get_uniform_state_weights(self):
        if hasattr(self.game, 'get_uniform_state_weights'):
            return self.game.get_uniform_state_weights()
        else:
            states = self.get_states()
            weights = np.ones(len(states))
            weights = [float(i) / sum(weights) for i in weights]
            return states, weights

    def generate_training_subset(self, percent_sim_data):
        if hasattr(self.game, 'generate_training_subset'):
            return self.game.generate_training_subset(percent_sim_data)

    def set_to_training_set(self):
        if hasattr(self.game, 'set_to_training_set'):
            return self.game.set_to_training_set()

    def set_to_testing_set(self):
        if hasattr(self.game, 'set_to_testing_set'):
            return self.game.set_to_testing_set()

    def get_states(self):
        if hasattr(self.game, 'states'):
            return self.game.states

    def _step(self, a):
        reward = self.game_state.act(self._action_set[a])
        state = self._get_state()
        terminal = self.game_state.game_over()
        return state, reward, terminal, {}

    def _get_image(self, game_state):
        image_rotated = np.fliplr(
            np.rot90(game_state.getScreenRGB(),
                     3))  # Hack to fix the rotated image returned by ple
        return image_rotated

    def _get_state(self):
        if self.rgb_state:
            return self._get_image(self.game_state)
        else:
            return self.game_state.getGameState()

    @property
    def _n_actions(self):
        return len(self._action_set)

    # return: (states, observations)
    def _reset(self):
        if self.rgb_state:
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self.state_width,
                                                       self.state_height, 3))
        else:
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=self.state_dim)
        self.game_state.reset_game()
        state = self._get_state()
        return state

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        img = self._get_image(self.game_state)
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def _seed(self, seed):
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng

        self.game_state.init()
Esempio n. 22
0
class MyEnv(Environment):
    VALIDATION_MODE = 0

    def __init__(self,
                 rng,
                 game=None,
                 frame_skip=4,
                 ple_options={
                     "display_screen": True,
                     "force_fps": True,
                     "fps": 30
                 }):

        self._mode = -1
        self._mode_score = 0.0
        self._mode_episode_count = 0

        self._frame_skip = frame_skip if frame_skip >= 1 else 1
        self._random_state = rng

        if game is None:
            raise ValueError("Game must be provided")

        self._ple = PLE(game, **ple_options)
        self._ple.init()

        w, h = self._ple.getScreenDims()
        self._screen = np.empty((h, w), dtype=np.uint8)
        self._reduced_screen = np.empty((48, 48), dtype=np.uint8)
        self._actions = self._ple.getActionSet()

    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._mode_score = 0.0
                self._mode_episode_count = 0
            else:
                self._mode_episode_count += 1
        elif self._mode != -1:  # and thus mode == -1
            self._mode = -1

        self._ple.reset_game()
        for _ in range(self._random_state.randint(15)):
            self._ple.act(self._ple.NOOP)
        self._screen = self._ple.getScreenGrayscale()
        cv2.resize(self._screen, (48, 48),
                   self._reduced_screen,
                   interpolation=cv2.INTER_NEAREST)

        return [4 * [48 * [48 * [0]]]]

    def act(self, action):
        action = self._actions[action]

        reward = 0
        for _ in range(self._frame_skip):
            reward += self._ple.act(action)
            if self.inTerminalState():
                break

        self._screen = self._ple.getScreenGrayscale()
        cv2.resize(self._screen, (48, 48),
                   self._reduced_screen,
                   interpolation=cv2.INTER_NEAREST)

        self._mode_score += reward
        return np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._mode_episode_count += 1
        print("== Mean score per episode is {} over {} episodes ==".format(
            self._mode_score / self._mode_episode_count,
            self._mode_episode_count))

    def inputDimensions(self):
        return [(4, 48, 48)]

    def observationType(self, subject):
        return np.float32

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._reduced_screen) / 256.]

    def inTerminalState(self):
        return self._ple.game_over()
Esempio n. 23
0
class PygameLearningEnvironment(Environment):

    def __init__(self, game_name, rewards, state_as_image = True, fps = 30, force_fps=True, frame_skip=2,
                 hold_action=2, visualize=False, width=84, height=84, lives=1):
        """
        Initialize Pygame Learning Environment
        https://github.com/ntasfi/PyGame-Learning-Environment

        Args:
            env_name: PLE environment

            fps: frames per second
            force_fps: False for slower speeds
            frame_skip: number of env frames to skip
            hold_action: number of env frames to hold each action for
            isRGB: get color or greyscale version of statespace #isRGB = False,
            game_height,game_width: height and width of environment
            visualize: If set True, the program will visualize the trainings, will slow down training
            lives: number of lives in game. Game resets on game over (ie lives = 0). only in Catcher and Pong (score)

        """

        self.env_name = game_name
        self.rewards = rewards
        self.lives = lives
        self.state_as_image = state_as_image
        self.fps = fps #30  # frames per second
        self.force_fps = force_fps #True  # False for slower speeds
        self.frame_skip = frame_skip  # frames to skip
        self.ple_num_steps = hold_action  # frames to continue action for
        #self.isRGB = isRGB #always returns color, lets tensorforce due the processing
        self.visualize = visualize
        self.width = width
        self.height = height
        #testing
        self.reached_terminal = 0
        self.episode_time_steps = 0
        self.episode_reward = 0
        self.total_time_steps = 0

        if self.env_name == 'catcher':
            self.game = Catcher(width=self.width, height=self.height,init_lives=self.lives)
        elif self.env_name == 'pixelcopter':
            self.game = Pixelcopter(width=self.width, height=self.height)
        elif self.env_name == 'pong':
            self.game = Pong(width=self.width, height=self.height,MAX_SCORE=self.lives)
        elif self.env_name == 'puckworld':
            self.game = PuckWorld(width=self.width, height=self.height)
        elif self.env_name == 'raycastmaze':
            self.game = RaycastMaze(width=self.width, height=self.height)
        elif self.env_name == 'snake':
            self.game = Snake(width=self.width, height=self.height)
        elif self.env_name == 'waterworld':
            self.game = WaterWorld(width=self.width, height=self.height)
        elif self.env_name == 'monsterkong':
            self.game = MonsterKong()
        elif self.env_name == 'flappybird':
            self.game = FlappyBird(width=144, height=256)  # limitations on height and width for flappy bird
        else:
            raise TensorForceError('Unknown Game Environement.')

        if self.state_as_image:
           process_state = None
        else:
            #create a preprocessor to read the state dictionary as a numpy array
            def process_state(state):
                # ret_value = np.fromiter(state.values(),dtype=float,count=len(state))
                ret_value = np.array(list(state.values()), dtype=np.float32)
                return ret_value

        # make a PLE instance
        self.env = PLE(self.game,reward_values=self.rewards,fps=self.fps, frame_skip=self.frame_skip,
                       num_steps=self.ple_num_steps,force_fps=self.force_fps,display_screen=self.visualize,
                       state_preprocessor = process_state)
        #self.env.init()
        #self.env.act(self.env.NOOP) #game starts on black screen
        #self.env.reset_game()
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.reset_game()


        # setup gamescreen object
        if state_as_image:
            w, h = self.env.getScreenDims()
            self.gamescreen = np.empty((h, w, 3), dtype=np.uint8)
        else:
            self.gamescreen = np.empty(self.env.getGameStateDims(), dtype=np.float32)
        # if isRGB:
        #     self.gamescreen = np.empty((h, w, 3), dtype=np.uint8)
        # else:
        #     self.gamescreen = np.empty((h, w), dtype=np.uint8)

        # setup action converter
        # PLE returns legal action indexes, convert these to just numbers
        self.action_list = self.env.getActionSet()
        self.action_list = sorted(self.action_list, key=lambda x: (x is None, x))



    def __str__(self):
        return 'PygameLearningEnvironment({})'.format(self.env_name)

    def close(self):
        pygame.quit()
        self.env = None

    def reset(self):
        # if isinstance(self.gym, gym.wrappers.Monitor):
        #     self.gym.stats_recorder.done = True
        #env.act(env.NOOP) # need to take an action or screen is black
        # clear gamescreen
        if self.state_as_image:
            self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.uint8)
        else:
            self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.float32)
        self.env.reset_game()
        return self.current_state

    def execute(self, actions):

        #print("lives check in ple {}".format(self.env.lives()))
        #self.env.saveScreen("test_screen_capture_before_{}.png".format(self.total_time_steps))
        #lives_check = self.env.lives() #testing code

        ple_actions = self.action_list[actions]
        reward = self.env.act(ple_actions)
        state = self.current_state
        # testing code
        # self.env.saveScreen("test_screen_capture_after_{}.png".format(self.total_time_steps))
        # self.episode_time_steps += 1
        # self.episode_reward += reward
        # self.total_time_steps += 1
        # print("reward is {}".format(reward))
        # #if self.env.lives() != lives_check:
        # #    print('lives are different is game over? {}'.format(self.env.game_over()))
        # print('lives {}, game over {}, old lives {}'.format(self.env.lives(),self.env.game_over(),lives_check))

        if self.env.game_over():
            terminal = True
            # testing code
            self.reached_terminal += 1
            # print("GAME OVER reached terminal {}".format(self.reached_terminal))
            # print("episode time steps {}, episode reward {}".format(self.episode_time_steps,self.episode_reward))
            # self.episode_reward = 0
            # self.episode_time_steps = 0
            # print("total timesteps {}".format(self.total_time_steps))
        else:
            terminal = False

        return state, terminal, reward

    @property
    def actions(self):
        return dict(type='int', num_actions=len(self.action_list), names=self.action_list)

    # @property
    # def actions(self):
    #     return OpenAIGym.action_from_space(space=self.gym.action_space)

    #ALE implementation
    # @property
    # def actions(self):
    #     return dict(type='int', num_actions=len(self.action_inds), names=self.action_names)

    @property
    def states(self):
        return dict(shape=self.gamescreen.shape, type=float)

    @property
    def current_state(self):
        #returned state can either be an image or an np array of key components
        if self.state_as_image:
            self.gamescreen = self.env.getScreenRGB()
            # if isRGB:
            #     self.gamescreen = self.env.getScreenRGB()
            # else:
            #     self.gamescreen = self.env.getScreenGrayscale()
        else:
            self.gamescreen = self.env.getGameState()

        return np.copy(self.gamescreen)

    #ALE implementation
    # @property
    # def states(self):
    #     return dict(shape=self.gamescreen.shape, type=float)

    # @property
    # def current_state(self):
    #     self.gamescreen = self.ale.getScreenRGB(self.gamescreen)
    #     return np.copy(self.gamescreen)

    # @property
    # def is_terminal(self):
    #     if self.loss_of_life_termination and self.life_lost:
    #         return True
    #     else:
    #         return self.ale.game_over()
Esempio n. 24
0
class PLEEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, game_name='FlappyBird', display_screen=True):
        # open up a game state to communicate with emulator
        import importlib
        game_module_name = ('ple.games.%s' % game_name).lower()
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)()
        self.game_state = PLE(game, fps=30, display_screen=display_screen)
        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_width, self.screen_height = self.game_state.getScreenDims()
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3))
        self.viewer = None

    def _step(self, a):
        reward = self.game_state.act(self._action_set[a])
        state = self._get_image()
        terminal = self.game_state.game_over()
        return state, reward, terminal, {}

    def _get_image(self):
        image_rotated = np.fliplr(
            np.rot90(self.game_state.getScreenRGB(),
                     3))  # Hack to fix the rotated image returned by ple
        return image_rotated

    @property
    def _n_actions(self):
        return len(self._action_set)

    # return: (states, observations)
    def _reset(self):
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3))
        self.game_state.reset_game()
        state = self._get_image()
        return state

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        img = self._get_image()
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def _seed(self, seed):
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng

        self.game_state.init()
Esempio n. 25
0

def discounted_rewards(rewards, gamma=0.99):
    res = []
    for r in reversed(rewards):
        cum_reward = res[0] if res else 0
        res.insert(0, gamma * cum_reward + r)

    return res


def train(env, agent):
    optimizer = torch.optim.Adam(agent.parameters())

    while True:
        agent.zero_grad()
        p, r = play_episode(env, agent)
        r = torch.tensor(discounted_rewards(r), device=agent.device)
        loss = -r * p
        loss = loss.mean()
        loss.backward()
        optimizer.step()


if __name__ == '__main__':
    env = PLE(Snake(), fps=30, display_screen=True)
    env.init()
    agent = Agent(env.getScreenDims(), 16, env.getActionSet())

    train(env, agent)