Esempio n. 1
0
class Environment:
  def __init__(self, rom_file, args):
    self.ale = ALEInterface()
    if args.display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', args.frame_skip)
    self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
    self.ale.setBool('color_averaging', args.color_averaging)

    if args.random_seed:
      self.ale.setInt('random_seed', args.random_seed)

    if args.record_screen_path:
      if not os.path.exists(args.record_screen_path):
        logger.info("Creating folder %s" % args.record_screen_path)
        os.makedirs(args.record_screen_path)
      logger.info("Recording screens to %s", args.record_screen_path)
      self.ale.setString('record_screen_dir', args.record_screen_path)

    if args.record_sound_filename:
      logger.info("Recording sound to %s", args.record_sound_filename)
      self.ale.setBool('sound', True)
      self.ale.setString('record_sound_filename', args.record_sound_filename)

    self.ale.loadROM(rom_file)

    if args.minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
      logger.info("Using minimal action set with size %d" % len(self.actions))
    else:
      self.actions = self.ale.getLegalActionSet()
      logger.info("Using full action set with size %d" % len(self.actions))
    logger.debug("Actions: " + str(self.actions))

    self.dims = (args.screen_height, args.screen_width)

  def numActions(self):
    return len(self.actions)

  def restart(self):
    self.ale.reset_game()

  def act(self, action):
    reward = self.ale.act(self.actions[action])
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    return resized

  def isTerminal(self):
    return self.ale.game_over()
class Emulate:
  def __init__(self, rom_file, display_screen=False,frame_skip=4,screen_height=84,screen_width=84,repeat_action_probability=0,color_averaging=True,random_seed=0,record_screen_path='screen_pics',record_sound_filename=None,minimal_action_set=True):
    self.ale = ALEInterface()
    if display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', frame_skip)
    self.ale.setFloat('repeat_action_probability', repeat_action_probability)
    self.ale.setBool('color_averaging', color_averaging)

    if random_seed:
      self.ale.setInt('random_seed', random_seed)

    self.ale.loadROM(rom_file)

    if minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
    else:
      self.actions = self.ale.getLegalActionSet()

    self.dims = (screen_width,screen_height)

  def numActions(self):
    return len(self.actions)

  def getActions(self):
  	return self.actions

  def restart(self):
    self.ale.reset_game()

  def act(self, action):
    reward = self.ale.act(self.actions[action])
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    return resized

  def getScreenGray(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    rotated = np.rot90(resized,k=1)
    return rotated

  def getScreenColor(self):
    screen = self.ale.getScreenRGB()
    resized = cv2.resize(screen, self.dims)
    rotated = np.rot90(resized,k=1)
    return rotated

  def isTerminal(self):
    return self.ale.game_over()
Esempio n. 3
0
def main():
    arguments = docopt.docopt(__doc__, version='ALE Demo Version 1.0')

    pygame.init()

    ale = ALEInterface()
    ale.setInt(b'random_seed', 123)
    ale.setBool(b'display_screen', True)
    ale.loadROM(str.encode(arguments['<rom_file>']))

    legal_actions = ale.getLegalActionSet()
    width, height = ale.getScreenDims()
    print(width, height)
    frame = ale.getScreenRGB()
    frame = np.array(frame, dtype=float)

    rewards, num_episodes = [], int(arguments['--iters'] or 5)
    for episode in range(num_episodes):
        total_reward = 0
        while not ale.game_over():
            total_reward += ale.act(random.choice(legal_actions))
        print('Episode %d reward %d.' % (episode, total_reward))
        rewards.append(total_reward)
        ale.reset_game()

    average = sum(rewards) / len(rewards)
    print('Average for %d episodes: %d' % (num_episodes, average))
Esempio n. 4
0
class Atari:
    def __init__(self, rom_dir):
        self.ale = ALEInterface()

        # Set settings
        self.ale.setInt("random_seed", 123)
        self.frame_skip = 4
        self.ale.setInt("frame_skip", self.frame_skip)
        self.ale.setBool("display_screen", False)
        self.ale.setBool("sound", True)
        self.record_sound_for_user = True
        self.ale.setBool("record_sound_for_user", self.record_sound_for_user)

        # NOTE recording audio to file still works. But if both file recording and
        # record_sound_for_user are enabled, then only the latter is done
        #  self.ale.setString("record_sound_filename", "")

        # Get settings
        self.ale.loadROM(rom_dir)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.legal_actions = self.ale.getLegalActionSet()

        # Action count across all episodes
        self.action_count = 0
        self.start_time = time.time()

        self.reset()

    def reset(self):
        self.ale.reset_game()

    def take_action(self):
        action = self.legal_actions[np.random.randint(self.legal_actions.size)]
        self.ale.act(action)
        self.action_count += 1

    def print_fps(self, delta_t=500):
        if self.action_count % delta_t == 0:
            print '[atari.py] Frames/second: %f' % (
                self.action_count / (time.time() - self.start_time))
            print '[atari.py] Overall game frame count:', atari.action_count * atari.frame_skip
            print '---------'

    def get_image_and_audio(self):
        np_data_image = np.zeros(self.screen_width * self.screen_height * 3,
                                 dtype=np.uint8)
        if self.record_sound_for_user:
            np_data_audio = np.zeros(self.ale.getAudioSize(), dtype=np.uint8)
            self.ale.getScreenRGBAndAudio(np_data_image, np_data_audio)

            # Also supports independent audio queries if user desires:
            #  self.ale.getAudio(np_data_audio)
        else:
            np_data_audio = 0
            self.ale.getScreenRGB(np_data_image)

        return np.reshape(np_data_image,
                          (self.screen_height, self.screen_width,
                           3)), np.asarray(np_data_audio)
Esempio n. 5
0
def train():
    ale = ALEInterface()
    ale.setInt('random_seed', 123)
    ale.loadROM('roms/breakout.bin')
    legal_actions = ale.getLegalActionSet()
    total_reward = 0
    while not ale.game_over():
        a = legal_actions[randrange(len(legal_actions))]
        reward = ale.act(a)
        screen = None
        screen = ale.getScreenRGB()
        print(screen)
        plt.imshow(screen)
        plt.show()

        total_reward += reward
        print(total_reward)
    print('Episode end!')
Esempio n. 6
0
    def _init_ale(self):
        ale = ALEInterface()
        ale.setBool('sound', self.play_sound)
        ale.setBool('display_screen', self.display_screen)
        ale.setInt('random_seed', self.random_seed)

        # Frame skip is implemented separately
        ale.setInt('frame_skip', 1)
        ale.setBool('color_averaging', False)
        ale.setFloat('repeat_action_probability', 0.0)
        # Somehow this repeat_action_probability has unexpected effect on game.
        # The larger this value is, the more frames games take to restart.
        # And when 1.0 games completely hang
        # We are setting the default value of 0.0 here, expecting that
        # it has no effect as frame_skip == 1
        # This action repeating is agent's concern
        # so we do not implement an equivalent in our wrapper.

        if self.record_screen_path:
            _LG.info('Recording screens: %s', self.record_screen_path)
            if not os.path.exists(self.record_screen_path):
                os.makedirs(self.record_screen_path)
            ale.setString('record_screen_dir', self.record_screen_path)

        if self.record_sound_filename:
            _LG.info('Recording sound: %s', self.record_sound_filename)
            record_sound_dir = os.path.dirname(self.record_sound_filename)
            if not os.path.exists(record_sound_dir):
                os.makedirs(record_sound_dir)
            ale.setBool('sound', True)
            ale.setString('record_sound_filename', self.record_sound_filename)

        ale.loadROM(self.rom_path)

        self._ale = ale
        self._actions = (ale.getMinimalActionSet() if self.minimal_action_set
                         else ale.getLegalActionSet())
class Game():
    """
    Wrapper around the ALEInterface class.
    """

    def __init__(self, rom_file, sdl=False):
        self.ale = ALEInterface()
        # Setup SDL
        if sdl:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool(b'sound', False) # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool(b'sound', True)
            self.ale.setBool(b'display_screen', True)

        # Load rom
        self.ale.loadROM(str.encode(rom_file))

    def get_action_set(self):
        return self.ale.getLegalActionSet()

    def get_minimal_action_set(self):
        return self.ale.getMinimalActionSet()

    def game_over(self):
        return self.ale.game_over()

    def act(self, action):
        return self.ale.act(action)

    def reset_game(self):
        self.ale.reset_game()

    def get_frame(self):
        return self.ale.getScreenRGB()
def get_random_baseline(gamepath):
    ale = ALEInterface()
    ale.setInt('random_seed', 42)

    recordings_dir = './recordings/breakout/'

    USE_SDL = True
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False) # Sound doesn't work on OSX
            #ale.setString("record_screen_dir", recordings_dir);
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
            ale.setBool('display_screen', True)

    # Load the ROM file
    ale.loadROM(gamepath)

    # Get the list of legal actions
    legal_actions = ale.getLegalActionSet()

    # Play 5 episodes
    rewards = []
    for episode in xrange(10):
        total_reward = 0
        while not ale.game_over():
            a = legal_actions[randrange(len(legal_actions))]
            reward = ale.act(a);
            total_reward += reward
        rewards.append(total_reward)
        #print 'Episode', episode, 'ended with score:', total_reward
        ale.reset_game()
    avg_reward = sum(rewards) / float(len(rewards))
    return avg_reward
Esempio n. 9
0
def get_random_baseline(gamepath):
    ale = ALEInterface()
    ale.setInt('random_seed', 42)

    recordings_dir = './recordings/breakout/'

    USE_SDL = True
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
            #ale.setString("record_screen_dir", recordings_dir);
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
            ale.setBool('display_screen', True)

    # Load the ROM file
    ale.loadROM(gamepath)

    # Get the list of legal actions
    legal_actions = ale.getLegalActionSet()

    # Play 5 episodes
    rewards = []
    for episode in xrange(10):
        total_reward = 0
        while not ale.game_over():
            a = legal_actions[randrange(len(legal_actions))]
            reward = ale.act(a)
            total_reward += reward
        rewards.append(total_reward)
        #print 'Episode', episode, 'ended with score:', total_reward
        ale.reset_game()
    avg_reward = sum(rewards) / float(len(rewards))
    return avg_reward
Esempio n. 10
0
class AtariGame(Task):
    ''' RL task based on Arcade Game.
    '''

    def __init__(self, rom_path, num_frames=4, live=False, skip_frame=0, mode='normal'):
        self.ale = ALEInterface()
        if live:
            USE_SDL = True
            if USE_SDL:
                if sys.platform == 'darwin':
                    import pygame
                    pygame.init()
                    self.ale.setBool('sound', False) # Sound doesn't work on OSX
                elif sys.platform.startswith('linux'):
                    self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)
        self.mode = mode
        self.live = live
        self.ale.loadROM(rom_path)
        self.num_frames = num_frames
        self.frames = []
        self.frame_id = 0
        self.cum_reward = 0
        self.skip_frame = skip_frame
        if mode == 'small':
            img = T.matrix('img')
            self.max_pool = theano.function([img], max_pool_2d(img, [4, 4]))
            self.img_shape = (16, 16)
        else:
            self.img_shape = (84, 84) # image shape according to DQN Nature paper.
        while len(self.frames) < 4:
            self.step(choice(self.valid_actions, 1)[0])
        self.reset()


    def copy(self):
        import dill as pickle
        return pickle.loads(pickle.dumps(self))


    def reset(self):
        self.ale.reset_game()
        self.frame_id = 0
        self.cum_reward = 0
        if self.skip_frame:
            for frame_i in range(self.skip_frame):
                self.step(choice(self.valid_actions, 1)[0])


    @property
    def _curr_frame(self):
        img = self.ale.getScreenRGB()
        img = rgb2yuv(img)[:, :, 0] # get Y channel, according to Nature paper.
        # print 'RAM', self.ale.getRAM()
        if self.mode == 'small':
            img = self.max_pool(img)
        img = imresize(img, self.img_shape, interp='bicubic')
        return  img


    @property
    def curr_state(self):
        '''
        return raw pixels.
        '''
        return np.array(self.frames, dtype=floatX) / floatX(255.) # normalize


    @property
    def state_shape(self):
        return self.curr_state.shape


    @property
    def num_actions(self):
        return len(self.valid_actions)


    @property
    def valid_actions(self):
        return self.ale.getLegalActionSet()


    def step(self, action):
        reward = self.ale.act(action)
        if len(self.frames) == self.num_frames:
            self.frames = self.frames[1:]
        self.frames.append(self._curr_frame)
        self.frame_id += 1
        #print 'frame_id', self.frame_id
        self.cum_reward += reward
        return reward # TODO: scale the gradient up.


    def is_end(self):
        if np.abs(self.cum_reward) > 0:
            return True
        return self.ale.game_over()


    def visualize(self, fig=1, fname=None, format='png'):
        import matplotlib.pyplot as plt
        fig = plt.figure(fig, figsize=(5,5))
        plt.clf()
        plt.axis('off')
        #res = plt.imshow(self.ale.getScreenRGB())
        res = plt.imshow(self._curr_frame, interpolation='none')
        if fname:
            plt.savefig(fname, format=format)
        else:
            plt.show()
        return res
Esempio n. 11
0
class aleForET:
    def __init__(self, rom_file, screen, rndseed, resume_state_file=None):
        # When you might pass None to screen:
        # You are not interested in running any functions that displays graphics
        # For example, you should only run proceed_one_step__fast__no_scr_support()
        # Otherwise, those functions uses self.screen and you will get a RuntimeError
        if screen != None:
            pygame.init()
            self.screen = screen
        GAME_W, GAME_H = 160, 210
        self.size = GAME_W * V.xSCALE, GAME_H * V.ySCALE

        # Get & Set the desired settings
        self.ale = ALEInterface()
        self.ale.setInt("random_seed", rndseed)
        self.ale.setBool('sound', False)
        self.ale.setBool('display_screen', False)
        self.ale.setBool('color_averaging', COLOR_AVG)
        self.ale.setFloat('repeat_action_probability', 0.0)

        # Load the ROM file
        self.ale.loadROM(rom_file)
        self.gamename = os.path.basename(rom_file).split('.')[0]
        self.clock = pygame.time.Clock()
        self._last_time = time.time()
        self.score = 0
        self.episode = 0
        self.frame_cnt = 0

        # Get the list of legal actions
        self.legal_actions = self.ale.getLegalActionSet()
        if resume_state_file:
            self.loadALEState(resume_state_file)

    def saveALEState(self, fname):
        basedir = os.path.dirname(fname)
        if not os.path.exists(basedir):
            os.makedirs(basedir)
        pALEState = self.ale.cloneSystemState(
        )  # actually it returns an int, a memory address pointing to a C++ object ALEState
        serialized_np = self.ale.encodeState(
            pALEState)  # this func actually takes a pointer
        np.savez(fname,
                 state=serialized_np,
                 score=self.score,
                 episode=self.episode)

    def loadALEState(self, fname):
        npzfile = np.load(fname)
        serialized_np = npzfile['state']
        self.score = npzfile['score']
        self.episode = npzfile['episode']
        pALEState = self.ale.decodeState(
            serialized_np
        )  # actually it returns an int, a memory address pointing to a C++ object ALEState
        self.ale.restoreSystemState(
            pALEState)  # this func actually takes a pointer

    def proceed_one_step(self,
                         action,
                         refresh_screen=False,
                         fps_limit=0,
                         model_gaze_output=None,
                         gc_window_drawer_func=None):
        self.clock.tick(
            fps_limit)  # control FPS. fps_limit == 0 means no limit
        self.frame_cnt += 1

        # Display FPS
        diff_time = time.time() - self._last_time
        if diff_time > 1.0:
            print 'FPS: %.1f' % self.clock.get_fps()
            self._last_time = time.time()

        # Show game image
        cur_frame_np = self.ale.getScreenRGB()
        if refresh_screen:
            cur_frame_Surface = pygame.surfarray.make_surface(cur_frame_np)
            cur_frame_Surface = pygame.transform.flip(cur_frame_Surface, True,
                                                      False)
            cur_frame_Surface = pygame.transform.rotate(cur_frame_Surface, 90)
            # Perform scaling directly on screen, leaving cur_frame_Surface unscaled.
            # Slightly faster than scaling cur_frame_Surface and then transfer to screen.
            pygame.transform.scale(cur_frame_Surface, self.size, self.screen)

            if gc_window_drawer_func != None and model_gaze_output:
                gc_window_drawer_func(self.screen, model_gaze_output)
            pygame.display.flip()

        # Apply an action and get the resulting reward
        reward = self.ale.act(action)
        self.score += reward

        return cur_frame_np, reward, self.check_episode_end_and_if_true_reset_game(
        )

    def proceed_one_step__fast__no_scr_support(self, action):
        self.frame_cnt += 1
        cur_frame_np = self.ale.getScreenRGB()
        reward = self.ale.act(action)
        self.score += reward
        return cur_frame_np, reward, self.check_episode_end_and_if_true_reset_game(
        )

    def check_episode_end_and_if_true_reset_game(self):
        end = self.ale.game_over()
        if end:
            print 'Episode', self.episode, 'ended with score:', self.score
            self.score = 0
            self.episode += 1
            self.ale.reset_game()
        return end  # after reset_game(),  ale.game_over()'s return value will change to false

    def run(self,
            gc_window_drawer_func=None,
            save_screen_func=None,
            event_handler_func=None,
            record_a_and_r_func=None):
        self.run_start_time = time.time()  # used in alerecord_main.py
        while True:
            self.check_episode_end_and_if_true_reset_game()
            self.clock.tick(FRAME_RATE)  # control FPS
            self.frame_cnt += 1

            key = pygame.key.get_pressed()
            if event_handler_func != None:
                stop, eyelink_err_code, bool_drawgc = event_handler_func(
                    key, self)
                if stop:
                    return eyelink_err_code

            # Display FPS
            diff_time = time.time() - self._last_time
            if diff_time > 1.0:
                print 'FPS: %.1f' % self.clock.get_fps()
                self._last_time = time.time()

            # Show game image
            cur_frame_np = self.ale.getScreenRGB()
            cur_frame_Surface = pygame.surfarray.make_surface(cur_frame_np)
            cur_frame_Surface = pygame.transform.flip(cur_frame_Surface, True,
                                                      False)
            cur_frame_Surface = pygame.transform.rotate(cur_frame_Surface, 90)
            # Perform scaling directly on screen, leaving cur_frame_Surface unscaled.
            # Slightly faster than scaling cur_frame_Surface and then transfer to screen.
            pygame.transform.scale(cur_frame_Surface, self.size, self.screen)

            if gc_window_drawer_func != None and bool_drawgc:
                gc_window_drawer_func(self.screen)
            pygame.display.flip()

            # Save frame to disk (160*210, i.e. not scaled; because this is faster)
            if save_screen_func != None:
                save_screen_func(cur_frame_Surface, self.frame_cnt)

            # Apply an action and get the resulting reward
            a_index = aenum.action_map(key, self.gamename)
            a = self.legal_actions[a_index]
            reward = self.ale.act(a)
            self.score += reward
            if record_a_and_r_func != None:
                record_a_and_r_func(a, reward, self.episode, self.score)

            pygame.event.pump()  # need this line to get new key pressed
        assert False, "Returning should only happen in the while True loop"

    def run_in_step_by_step_mode(self,
                                 gc_window_drawer_func=None,
                                 save_screen_func=None,
                                 event_handler_func=None,
                                 record_a_and_r_func=None):
        bool_drawgc = False
        self.run_start_time = time.time()  # used in alerecord_main.py
        while True:
            self.check_episode_end_and_if_true_reset_game()
            # Get game image
            cur_frame_np = self.ale.getScreenRGB()
            cur_frame_Surface = pygame.surfarray.make_surface(cur_frame_np)
            cur_frame_Surface = pygame.transform.flip(cur_frame_Surface, True,
                                                      False)
            cur_frame_Surface = pygame.transform.rotate(cur_frame_Surface, 90)

            self.frame_cnt += 1
            # Save frame to disk (160*210, i.e. not scaled; because this is faster)
            if save_screen_func != None:
                save_screen_func(cur_frame_Surface, self.frame_cnt)

            key, draw_next_game_frame = None, False
            while not draw_next_game_frame:
                self.clock.tick(FRAME_RATE)  # control FPS

                key = pygame.key.get_pressed()
                if event_handler_func != None:
                    stop, eyelink_err_code, bool_drawgc = event_handler_func(
                        key, self)
                    if stop:
                        return eyelink_err_code
                a_index = aenum.action_map(key, self.gamename)
                # Not in all cases when action_map returns "NO OP" is the real action "NO OP",
                # Only when the human press "TAB", is the real action "NO OP".
                if (a_index == aenum.PLAYER_A_NOOP and key[pygame.K_TAB]) \
                or  a_index != aenum.PLAYER_A_NOOP:
                    draw_next_game_frame = True

                # Draw the image onto screen.
                # Perform scaling directly on screen, leaving cur_frame_Surface unscaled.
                pygame.transform.scale(cur_frame_Surface, self.size,
                                       self.screen)

                if gc_window_drawer_func != None and bool_drawgc:
                    gc_window_drawer_func(self.screen)

                pygame.display.flip()
                pygame.event.pump()  # need this line to get new key pressed

            # Apply an action and get the resulting reward
            a = self.legal_actions[a_index]
            reward = self.ale.act(a)
            self.score += reward
            if record_a_and_r_func != None:
                record_a_and_r_func(a, reward, self.episode, self.score)
        assert False, "Returning code should only be in the while True loop"
Esempio n. 12
0
class ALEEnvironment(Environment):
    def __init__(self, rom_file, args):
        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()

        # Set ALE configuration
        self.ale.setInt(b'frame_skip', args.frame_skip)
        self.ale.setFloat(b'repeat_action_probability',
                          args.repeat_action_probability)
        self.ale.setBool(b'color_averaging', args.color_averaging)

        if args.random_seed:
            self.ale.setInt(b'random_seed', args.random_seed)

        if args.record_screen_path:
            if not os.path.exists(args.record_screen_path):
                os.makedirs(args.record_screen_path)
            self.ale.setString(b'record_screen_dir',
                               args.record_screen_path.encode())

        if args.record_sound_filename:
            self.ale.setBool(b'sound', True)
            self.ale.setString(b'record_sound_filename',
                               args.record_sound_filename.encode())

        # Load ROM
        self.ale.loadROM(rom_file.encode())

        # Set game difficulty and mode (after loading)
        self.ale.setDifficulty(args.game_difficulty)
        self.ale.setMode(args.game_mode)

        # Whether to use minimum set or set
        if args.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
        else:
            self.actions = self.ale.getLegalActionSet()

        # Life lost control
        self.life_lost = False

        # Initialize base class
        super(ALEEnvironment, self).__init__(args)

    def action_dim(self):
        return len(self.actions)

    def reset(self):
        # In test mode, the game is simply initialized. In train mode, if the game
        # is in terminal state due to a life loss but not yet game over, then only
        # life loss flag is reset so that the next game starts from the current
        # state. Otherwise, the game is simply initialized.
        if (self.mode == 'test' or not self.life_lost
                or  # `reset` called in a middle of episode
                self.ale.game_over()  # all lives are lost
            ):
            self.ale.reset_game()
        self.life_lost = False
        screen = self._get_state(self.ale.getScreenRGB())
        return screen

    def step(self, action, action_b=0, ignore_screen=False):
        lives = self.ale.lives()
        # Act on environment
        reward = self.ale.act(self.actions[action],
                              self.actions[action_b] + 18)
        # Check if life was lost
        self.life_lost = (not lives == self.ale.lives())
        # Check terminal state
        terminal = (self.ale.game_over() or self.life_lost
                    ) if self.mode == 'train' else self.ale.game_over()
        # Check if should ignore the screen (in case of RobotEnvironment)
        if ignore_screen:
            screen = None
        else:
            # Get screen from ALE
            screen = self._get_state(self.ale.getScreenRGB())
            # Wait for next frame to start
            self.fps_control.wait_next_frame()
        return screen, reward, terminal
Esempio n. 13
0
class GameEnvironment:

    def __init__(self, settings):

        self.ale = ALEInterface()
        self.ale.setBool('display_screen', settings['DISPLAY_SCREEN'])
        self.ale.setBool('sound', settings['SOUND'])
        self.ale.setBool('color_averaging', settings['COLOR_AVERAGING'])
        self.ale.setInt('random_seed', settings['RANDOM_SEED'])
        self.ale.setInt('frame_skip', settings['FRAME_SKIP'])
        self.ale.setFloat('repeat_action_probability', settings['REPEAT_ACTION_PROB'])
        roms_dir = settings['ROMS_DIR']
        rom_name = settings['ROM_NAME']
        ROM = None
        if(rom_name.endswith('.bin')):
            self.name = rom_name[:-4]
            ROM = rom_name
        else:
            self.name = rom_name
            ROM = rom_name + '.bin'

        self.ale.loadROM(os.path.join(roms_dir, ROM))
        self.random_starts = settings['RANDOM_STARTS']
        self.rng = settings['RNG']

        if(settings['MINIMAL_ACTION_SET']):
            self.actions = self.ale.getMinimalActionSet()
        else:
            self.actions = self.ale.getLegalActionSet()

        self.n_actions = len(self.actions)
        self.width, self.height = self.ale.getScreenDims()

        self.observation = np.zeros((self.height, self.width), dtype='uint8')
        self.reward = None
        self.game_over = None
        self.terminal = None
        self.total_lives = None

        self.init()

    def init(self):

        self.restartGame()
        self.reward = 0
        self.game_over = self.gameOver()
        self.terminal = self.game_over
        self.total_lives = self.lives()
        self.step(0)

    def getState(self):

        return self.observation, self.reward, self.terminal, self.game_over

    def step(self, action, training=False):

        self.reward = self.act(action)
        self.paint()
        lives = self.lives()
        self.game_over = self.gameOver()
        self.terminal = self.game_over
        if(training and (lives < self.total_lives)):
            self.terminal = True

        self.total_lives = lives
        return self.getState()

    def newGame(self):

        self.init()
        for i in xrange(self.rng.randint(1, self.random_starts)):
            self.act(0)
            terminal = self.gameOver()
            if(terminal):
                print "Warning terminal in random init"

        return self.step(0)

    def newTestGame(self):

        self.init()
        return self.getState()

    def paint(self):

        self.ale.getScreenGrayscale(self.observation)

    def getScreenRGB(self):

        return self.ale.getScreenRGB()

    def act(self, action):

        assert ((action >= 0) and (action < self.n_actions))
        return self.ale.act(self.actions[action])

    def lives(self):

        return self.ale.lives()

    def restartGame(self):

        self.ale.reset_game()

    def gameOver(self):

        return self.ale.game_over()
    def __init__(self,
                 random_seed,
                 frame_skip,
                 repeat_action_probability,
                 sound,
                 display_screen,
                 block_state_repr=None,
                 enemy_state_repr=None,
                 friendly_state_repr=None):
        ale = ALEInterface()

        # Get & Set the desired settings
        if random_seed is not None:
            ale.setInt('random_seed', random_seed)
        ale.setInt('frame_skip', frame_skip)
        ale.setFloat('repeat_action_probability', repeat_action_probability)

        if display_screen:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
            ale.setBool('sound', sound)

        ale.setBool('display_screen', display_screen)

        # Load the ROM file
        ale.loadROM('qbert.bin')

        # Get the list of legal actions
        legal_actions = ale.getLegalActionSet()
        minimal_actions = ale.getMinimalActionSet()
        logging.debug('Legal actions: {}'.format(
            [action_number_to_name(a) for a in legal_actions]))
        logging.debug('Minimal actions: {}'.format(
            [action_number_to_name(a) for a in minimal_actions]))

        width, height = ale.getScreenDims()
        rgb_screen = np.empty([height, width, 3], dtype=np.uint8)

        ram_size = ale.getRAMSize()
        ram = np.zeros(ram_size, dtype=np.uint8)

        # ALE components
        self.ale = ale
        self.lives = ale.lives()
        self.rgb_screen = rgb_screen
        self.ram_size = ale.getRAMSize()
        self.ram = ram

        # Verbose state representation
        self.desired_color = COLOR_YELLOW
        self.block_colors = INITIAL_COLORS
        self.enemies = INITIAL_ENEMY_POSITIONS
        self.friendlies = INITIAL_FRIENDLY_POSITIONS
        self.discs = INITIAL_DISCS
        self.current_row, self.current_col = 0, 0
        self.level = 1
        self.enemy_present = False
        self.friendly_present = False
        self.block_state_repr = block_state_repr
        self.enemy_state_repr = enemy_state_repr
        self.friendly_state_repr = friendly_state_repr
        self.num_colored_blocks = 0
Esempio n. 15
0
class ALEEnvironment(Environment):
    def __init__(self, rom_file, args):
        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()
        if args.display_screen:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability',
                          args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)

        if args.random_seed:
            self.ale.setInt('random_seed', args.random_seed)

        if args.record_screen_path:
            if not os.path.exists(args.record_screen_path):
                logger.info("Creating folder %s" % args.record_screen_path)
                os.makedirs(args.record_screen_path)
            logger.info("Recording screens to %s", args.record_screen_path)
            self.ale.setString('record_screen_dir', args.record_screen_path)

        if args.record_sound_filename:
            logger.info("Recording sound to %s", args.record_sound_filename)
            self.ale.setBool('sound', True)
            self.ale.setString('record_sound_filename',
                               args.record_sound_filename)

        self.ale.loadROM(rom_file)

        if args.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
            logger.info("Using minimal action set with size %d" %
                        len(self.actions))
        else:
            self.actions = self.ale.getLegalActionSet()
            logger.info("Using full action set with size %d" %
                        len(self.actions))
        logger.debug("Actions: " + str(self.actions))

        # OpenCV expects width as first and height as second
        self.dims = (args.screen_width, args.screen_height)

    def numActions(self):
        return len(self.actions)

    def restart(self):
        self.ale.reset_game()

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def getScreen(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        return resized

    def isTerminal(self):
        return self.ale.game_over()
Esempio n. 16
0
class AtariSimulator(object):

    def __init__(self, settings):

        '''Initiate Arcade Learning Environment (ALE) using Python interface
        https://github.com/bbitmaster/ale_python_interface/wiki

        - Set number of frames to be skipped, random seed, ROM and title for display.
        - Retrieve a set of legal actions and their number.
        - Retrieve dimensions of the original screen (width/height), and set the dimensions
        of the cropped screen, together with the padding used to crop the screen rectangle.
        - Set dimensions of the pygame display that will show visualization of the simulation.
        (May be cropped --- showing what the learner sees, or not --- showing full Atari screen)
        - Allocate memory for generated grayscale screenshots. Accepts dims in (height/width) format
        '''

        self.ale = ALEInterface()
        self.ale.setInt("frame_skip",settings["frame_skip"])
        self.ale.setInt("random_seed",settings["seed_simulator"])
        self.ale.loadROM(settings["rom_dir"] + '/' + settings["rom"])

        self.title = "ALE Simulator: " + str(settings["rom"])
        self.actions = self.ale.getLegalActionSet()
        self.n_actions = self.actions.size

        self.screen_dims = self.ale.getScreenDims()
        self.model_dims = settings['model_dims']
        self.pad = settings['pad']

        print("Original screen width/height: " + str(self.screen_dims[0]) + "/" + str(self.screen_dims[1]))
        print("Cropped screen width/height: " + str(self.model_dims[0]) + "/" + str(self.model_dims[1]))

        self.viz_cropped = settings['viz_cropped']
        if self.viz_cropped:
            self.display_dims = (int(self.model_dims[0]*2), int(self.model_dims[1]*2))
        else:
            self.display_dims = (int(self.screen_dims[0]*2), int(self.screen_dims[1]*2))

        # preallocate an array to accept ALE screen data (height/width) !
        self.screen_data = np.empty((self.screen_dims[1],self.screen_dims[0]),dtype=np.uint8)


    def get_screenshot(self):
        '''returns a cropped snapshot of the simulator
        - store grayscale values in a preallocated array
        - cut out a square from the rectangle, using provided padding value
        - downsample to the desired size and transpose from (height/width) to (width/height)
        '''

        self.ale.getScreenGrayscale(self.screen_data)
        self.tmp = self.screen_data[(self.screen_dims[1]-self.screen_dims[0]-self.pad):(self.screen_dims[1]-self.pad),:]
        self.frame = spm.imresize(self.tmp,self.model_dims[::-1],interp='nearest').T #, interp='nearest'

        return self.frame


    def act(self,action_index):
        '''function to transition the simulator from s to s' using provided action
        the action that is provided is in form of an index
        simulator deals with translating the index into an actual action'''

        self.last_reward = self.ale.act(self.actions[action_index])


    def reward(self):
        '''return reward - has to be called after the "act" function'''

        return self.last_reward


    def episode_over(self):
        '''return a boolean indicator on whether the game is still running'''

        return self.ale.game_over()
        

    def reset_episode(self):
        '''reset the game that ended'''

        self.ale.reset_game()


    def init_viz_display(self):
        '''initialize display that will show visualization'''

        pygame.init()
        self.screen = pygame.display.set_mode(self.display_dims)
        if self.title:
            pygame.display.set_caption(self.title)


    def refresh_viz_display(self):
        '''if display is shut down, shut the game down
        else move the current simulator's frame (cropped or not cropped) into the pygame display,
        after expanding it 2x along x and y dimensions'''

        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                exit

        if self.viz_cropped:
            self.surface = pygame.surfarray.make_surface(self.frame) # has already been transposed
        else:
            self.surface = pygame.surfarray.make_surface(self.screen_data.T)

        self.screen.blit(pygame.transform.scale2x(self.surface),(0,0))
        pygame.display.flip()
Esempio n. 17
0
class ALEEnvironment(Environment):
    def __init__(self, rom_file, args):
        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()
        if args.display_screen:
            self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability',
                          args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)

        if args.random_seed:
            self.ale.setInt('random_seed', args.random_seed)

        if args.record_screen_path:
            if not os.path.exists(args.record_screen_path):
                logger.info("Creating folder %s" % args.record_screen_path)
                os.makedirs(args.record_screen_path)
            logger.info("Recording screens to %s", args.record_screen_path)
            self.ale.setString('record_screen_dir', args.record_screen_path)

        if args.record_sound_filename:
            logger.info("Recording sound to %s", args.record_sound_filename)
            self.ale.setBool('sound', True)
            self.ale.setString('record_sound_filename',
                               args.record_sound_filename)

        self.ale.loadROM(rom_file)

        if args.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
            logger.info("Using minimal action set with size %d" %
                        len(self.actions))
        else:
            self.actions = self.ale.getLegalActionSet()
            logger.info("Using full action set with size %d" %
                        len(self.actions))
        logger.debug("Actions: " + str(self.actions))

        self.screen_width = args.screen_width
        self.screen_height = args.screen_height

        self.life_lost = False

    def numActions(self):
        return len(self.actions)

    def restart(self):
        # In test mode, the game is simply initialized. In train mode, if the game
        # is in terminal state due to a life loss but not yet game over, then only
        # life loss flag is reset so that the next game starts from the current
        # state. Otherwise, the game is simply initialized.
        if (self.mode == 'test' or not self.life_lost
                or  # `reset` called in a middle of episode
                self.ale.game_over()  # all lives are lost
            ):
            self.ale.reset_game()
        self.life_lost = False

    def act(self, action):
        lives = self.ale.lives()
        reward = self.ale.act(self.actions[action])
        self.life_lost = (not lives == self.ale.lives())
        return reward

    def getScreen(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, (self.screen_width, self.screen_height))
        return resized

    def isTerminal(self):
        if self.mode == 'train':
            return self.ale.game_over() or self.life_lost
        return self.ale.game_over()
Esempio n. 18
0
class ALEEnvironment():
    def __init__(self, rom_file, args):

        self.ale = ALEInterface()
        self.histLen = 4

        if args.display_screen:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        self.ale.setInt('frame_skip', args.frame_skip)
        #self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)

        if args.random_seed:
            self.ale.setInt('random_seed', args.random_seed)
        self.ale.loadROM(rom_file)

        if args.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
            logger.info("Using minimal action set with size %d" %
                        len(self.actions))
        else:
            self.actions = self.ale.getLegalActionSet()
            logger.info("Using full action set with size %d" %
                        len(self.actions))
        logger.debug("Actions: " + str(self.actions))

        self.screen_width = args.screen_width
        self.screen_height = args.screen_height

        self.mode = "train"
        self.life_lost = False
        self.initSrcreen = self.getScreen()
        self.goalSet = []
        self.goalSet.append([[70, 65], [74, 71]])  # lower right ladder 4
        self.goalSet.append([[11, 58], [15, 66]])  # lower left ladder 3
        self.goalSet.append([[7, 41], [11, 45]])  # key 5
        self.goalCenterLoc = []
        for goal in self.goalSet:
            goalCenter = [
                float(goal[0][0] + goal[1][0]) / 2,
                float(goal[0][1] + goal[1][1]) / 2
            ]
            self.goalCenterLoc.append(goalCenter)
        self.agentOriginLoc = [42, 33]
        self.agentLastX = 42
        self.agentLastY = 33
        self.reachedGoal = [0, 0, 0]
        self.histState = self.initializeHistState()

    def initializeHistState(self):
        histState = np.concatenate((self.getState(), self.getState()), axis=2)
        histState = np.concatenate((histState, self.getState()), axis=2)
        histState = np.concatenate((histState, self.getState()), axis=2)
        return histState

    def numActions(self):
        return len(self.actions)

    def resetGoalReach(self):
        self.reachedGoal = [0, 0, 0]

    def restart(self):
        # In test mode, the game is simply initialized. In train mode, if the game
        # is in terminal state due to a life loss but not yet game over, then only
        # life loss flag is reset so that the next game starts from the current
        # state. Otherwise, the game is simply initialized.
        if (self.mode == 'test' or not self.life_lost
                or  # `reset` called in a middle of episode
                self.ale.game_over()  # all lives are lost
            ):
            self.ale.reset_game()
        self.life_lost = False
        self.reachedGoal = [0, 0, 0]
        for i in range(19):
            self.act(0)  #wait for initialization
        self.histState = self.initializeHistState()
        self.agentLastX = self.agentOriginLoc[0]
        self.agentLastY = self.agentOriginLoc[1]

    def beginNextLife(self):
        self.life_lost = False
        self.reachedGoal = [0, 0, 0]
        for i in range(19):
            self.act(0)  #wait for initialization
        self.histState = self.initializeHistState()
        self.agentLastX = self.agentOriginLoc[0]
        self.agentLastY = self.agentOriginLoc[1]

    def act(self, action):
        lives = self.ale.lives()
        reward = self.ale.act(self.actions[action])
        self.life_lost = (not lives == self.ale.lives())
        currState = self.getState()
        self.histState = np.concatenate((self.histState[:, :, 1:], currState),
                                        axis=2)
        return reward

    def getScreen(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, (self.screen_width, self.screen_height))
        return resized

    def getScreenRGB(self):
        screen = self.ale.getScreenRGB()
        resized = cv2.resize(screen, (self.screen_width, self.screen_height))
        #resized = screen
        return resized

    def getAgentLoc(self):
        img = self.getScreenRGB()
        man = [200, 72, 72]
        mask = np.zeros(np.shape(img))
        mask[:, :, 0] = man[0]
        mask[:, :, 1] = man[1]
        mask[:, :, 2] = man[2]

        diff = img - mask
        indxs = np.where(diff == 0)
        diff[np.where(diff < 0)] = 0
        diff[np.where(diff > 0)] = 0
        diff[indxs] = 255
        if (np.shape(indxs[0])[0] == 0):
            mean_x = self.agentLastX
            mean_y = self.agentLastY
        else:
            mean_y = np.sum(indxs[0]) / np.shape(indxs[0])[0]
            mean_x = np.sum(indxs[1]) / np.shape(indxs[1])[0]
        self.agentLastX = mean_x
        self.agentLastY = mean_y
        return (mean_x, mean_y)

    def distanceReward(self, lastGoal, goal):
        if (lastGoal == -1):
            lastGoalCenter = self.agentOriginLoc
        else:
            lastGoalCenter = self.goalCenterLoc[lastGoal]
        goalCenter = self.goalCenterLoc[goal]
        agentX, agentY = self.getAgentLoc()
        dis = np.sqrt((goalCenter[0] - agentX) * (goalCenter[0] - agentX) +
                      (goalCenter[1] - agentY) * (goalCenter[1] - agentY))
        disLast = np.sqrt((lastGoalCenter[0] - agentX) *
                          (lastGoalCenter[0] - agentX) +
                          (lastGoalCenter[1] - agentY) *
                          (lastGoalCenter[1] - agentY))
        disGoals = np.sqrt((goalCenter[0] - lastGoalCenter[0]) *
                           (goalCenter[0] - lastGoalCenter[0]) +
                           (goalCenter[1] - lastGoalCenter[1]) *
                           (goalCenter[1] - lastGoalCenter[1]))
        return 0.001 * (disLast - dis) / disGoals

    # add color channel for input of network
    def getState(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, (self.screen_width, self.screen_height))
        return np.reshape(resized, (84, 84, 1))

    def getStackedState(self):
        return self.histState

    def isTerminal(self):
        if self.mode == 'train':
            return self.ale.game_over() or self.life_lost
        return self.ale.game_over()

    def isGameOver(self):
        return self.ale.game_over()

    def isLifeLost(self):
        return self.life_lost

    def reset(self):
        self.ale.reset_game()
        self.life_lost = False

    def goalReached(self, goal):
        goalPosition = self.goalSet[goal]
        goalScreen = self.initSrcreen
        stateScreen = self.getScreen()
        count = 0
        for y in range(goalPosition[0][0], goalPosition[1][0]):
            for x in range(goalPosition[0][1], goalPosition[1][1]):
                if goalScreen[x][y] != stateScreen[x][y]:
                    count = count + 1
        # 30 is total number of pixels of agent
        if float(count) / 30 > 0.3:
            self.reachedGoal[goal] = 1
            return True
        return False

    def goalNotReachedBefore(self, goal):
        if (self.reachedGoal[goal] == 1):
            return False
        return True
Esempio n. 19
0
def make_dataset(extension='.png'):
    if len(sys.argv) < 3:
        print('Usage: %s rom_file num_games' % sys.argv[0])
        sys.exit()

    ale = ALEInterface()

    # set seed for reproducibility
    ale.setInt(b'random_seed', 123)

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.
    USE_SDL = False
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', True)

    # load the ROM file
    rom_file = str.encode(sys.argv[1])
    ale.loadROM(rom_file)

    # get number of runs
    num_games = int(sys.argv[2])

    # set RGB flag
    if len(sys.argv) == 4:
        rgb = bool(sys.argv[3])

    # get the list of legal actions
    legal_actions = ale.getLegalActionSet()

    # make recording directory
    import os
    if not os.path.exists('./record/'):
        os.makedirs('./record/')
    if not os.path.exists('./record/train/'):
        os.makedirs('./record/train/')
    if not os.path.exists('record/test/'):
        os.makedirs('./record/test/')

    # initialise iteration counter
    iter = 0

    # play game
    for episode in range(num_games):
        total_reward = 0
        while not ale.game_over():
            if np.mod(iter, 2) == 0:
                screenshot_odd = ale.getScreenRGB()
            else:
                # take current screenshot as the maximum of last two
                screenshot = np.maximum(ale.getScreenRGB(), screenshot_odd)
                # pre-process image
                screenshot = __pre_process(screenshot, rgb=rgb)
                # save screenshot in appropriate directory
                __save_image(screenshot, iter / 2, extension=extension)
            # select random action
            a = legal_actions[randrange(len(legal_actions))]
            # apply an action and get the resulting reward
            reward = ale.act(a)
            # increment award
            total_reward += reward
            # increment iteration counter
            iter += 1
        print('Episode %d ended with score: %d' % (episode, total_reward))
        ale.reset_game()
Esempio n. 20
0
class Atari:
    def __init__(self, rom_dir):
        self.ale = ALEInterface()

        # Set settings
        self.ale.setInt("random_seed", 123)
        self.frame_skip = 1
        self.ale.setInt("frame_skip", self.frame_skip)
        self.ale.setBool("display_screen", False)
        self.ale.setBool("sound", True)
        self.record_sound_for_user = True
        self.ale.setBool("record_sound_for_user", self.record_sound_for_user)

        # NOTE recording audio to file still works. But if both file recording and
        # record_sound_for_user are enabled, then only the latter is done
        #  self.ale.setString("record_sound_filename", "")

        # Get settings
        self.ale.loadROM(rom_dir)
        self.action_count = 0
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.legal_actions = self.ale.getLegalActionSet()
        self.framerate = 60  # Should read from ALE settings technically
        self.samples_per_frame = 512  # Should read from ALE SoundExporter class technically
        self.audio_freq = self.framerate * self.samples_per_frame  #/self.frame_skip
        self.all_audio = np.zeros((0, ), dtype=np.uint8)

        # Saving audio/video to disk for verification.
        self.save_to_file = True  # NOTE set to False to test actual screen/audio query speed!
        if self.save_to_file:
            self.save_dir_av = './logs_av_seq_Example'  # Save png sequence and audio wav file here
            self.save_dir_movies = './log_movies_Example'
            self.save_image_prefix = 'image_frames'
            self.save_audio_filename = 'audio_user_recorder.wav'
            self.create_save_dir(self.save_dir_av)

    def take_action(self):
        action = self.legal_actions[np.random.randint(self.legal_actions.size)]
        self.ale.act(action)

    def create_save_dir(self, directory):
        # Remove previous img/audio image logs
        if os.path.exists(directory):
            shutil.rmtree(directory)
        os.makedirs(directory)

    def get_image_and_audio(self):
        np_data_image = np.zeros(self.screen_width * self.screen_height * 3,
                                 dtype=np.uint8)
        if self.record_sound_for_user:
            np_data_audio = np.zeros(self.ale.getAudioSize(), dtype=np.uint8)
            self.ale.getScreenRGBAndAudio(np_data_image, np_data_audio)

            # Also supports independent audio queries if user desires:
            #  self.ale.getAudio(np_data_audio)
        else:
            #  np_data_audio = 0
            np_data_audio = np.zeros(self.ale.getAudioSize(), dtype=np.uint8)
            self.ale.getAudio(np_data_audio)
            self.ale.getScreenRGB(np_data_image)

        return np.reshape(np_data_image,
                          (self.screen_height, self.screen_width,
                           3)), np.asarray(np_data_audio)

    def audio_to_mfcc(self, audio):
        mfcc_data = mfcc(signal=audio,
                         samplerate=self.audio_freq,
                         winlen=0.002,
                         winstep=0.0006)
        mfcc_data = np.swapaxes(mfcc_data, 0, 1)  # Time on x-axis

        # Normalization
        min_data = np.min(mfcc_data.flatten())
        max_data = np.max(mfcc_data.flatten())
        mfcc_data = (mfcc_data - min_data) / (max_data - min_data)

        return mfcc_data

    def save_image(self, image):
        number = str(self.action_count).zfill(6)
        scipy.misc.imsave(
            os.path.join(self.save_dir_av,
                         self.save_image_prefix + number + '.png'), image)

    def save_audio(self, audio):
        wavfile.write(os.path.join(self.save_dir_av, self.save_audio_filename),
                      self.audio_freq, audio)

    def save_movie(self, movie_name):
        # Use ffmpeg to convert the saved img sequences and audio to mp4

        # Video recording
        command = [
            "ffmpeg",
            '-y',  # overwrite output file if it exists
            '-r',
            str(self.framerate),  # frames per second
            '-i',
            os.path.join(self.save_dir_av, self.save_image_prefix +
                         '%6d.png')  # Video input comes from pngs
        ]

        # Audio if available
        if self.record_sound_for_user:
            command.extend([
                '-i',
                os.path.join(self.save_dir_av, self.save_audio_filename)
            ])  # Audio input comes from wav

        # Codecs and output
        command.extend([
            '-c:v',
            'libx264',  # Video codec
            '-c:a',
            'mp3',  # Audio codec
            os.path.join(self.save_dir_movies,
                         movie_name + '.mp4')  # Output dir
        ])

        # Make movie dir and write the mp4
        if not os.path.exists(self.save_dir_movies):
            os.makedirs(self.save_dir_movies)
        sp.call(
            command
        )  # NOTE: needs ffmpeg! Will throw 'dir doesn't exist err' otherwise.

    def concat_image_audio(self, image, audio_mfcc):
        # Concatenates image and audio to test sync'ing in saved .mp4
        audio_mfcc = scipy.misc.imresize(audio_mfcc, np.shape(
            image))  # Resize MFCC image to be same size as screen image
        cmap = plt.get_cmap('viridis')  # Apply a colormap to spectrogram
        audio_mfcc = (np.delete(cmap(audio_mfcc), 3, 2) * 255.).astype(
            np.uint8)  # Gray MFCC -> 4 channel colormap -> 3 channel colormap
        image = np.concatenate((image, audio_mfcc),
                               axis=1)  # Concat screen image and MFCC image
        return image

    def plot_mfcc(self, audio_mfcc):
        plt.clf()
        plt.imshow(audio_mfcc,
                   interpolation='bilinear',
                   cmap=plt.get_cmap('viridis'))
        plt.pause(0.001)
Esempio n. 21
0
class ALEEnvironment(Environment):
    """
    A environment wrapper for the ALE environment
    """
    def __init__(self, rom_name, visible=True):
        super().__init__('Arcade Learning Environment')

        frame_skip = 20

        self._ale = ALEInterface()
        self._ale_sampler = ALEInterface()

        self._ale.setBool(b'display_screen', visible)
        #self._ale.setInt(b'frame_skip', frame_skip)

        #self._ale_sampler.setBool(b'display_screen', True)
        #self._ale_sampler.setInt(b'frame_skip', frame_skip)

        self._ale.loadROM(rom_name.encode('ascii'))
        self._ale_sampler.loadROM(rom_name.encode('ascii'))
        self._action_space = self._ale.getLegalActionSet()
        self._current_score = 0

    def evaluate_rollout(self, solution, discount_factor=0):
        #temp_state = self._ale.cloneState()

        temp_ale = self._ale.encodeState(self._ale.cloneState())
        temp_state = self._ale_sampler.decodeState(temp_ale)
        self._ale_sampler.restoreState(temp_state)

        prev_lives = self._ale.lives()
        total_rollout_reward = 0
        discount = 1
        for action in solution:
            rollout_reward = self._ale_sampler.act(action)

            if discount_factor is not None:
                rollout_reward *= discount
                discount *= discount_factor

            total_rollout_reward += rollout_reward

            if self._ale_sampler.game_over():
                break

        score_delta = total_rollout_reward + (self._ale_sampler.lives() -
                                              prev_lives)

        #self._ale.restoreState(temp_state)

        return score_delta

    def perform_action(self, action):
        reward = self._ale.act(action)
        self._current_score += reward

    def get_current_score(self):
        return self._current_score

    def get_current_lives(self):
        return self._ale.lives()

    def get_random_action(self):
        return np.random.choice(self._action_space)

    def is_game_over(self):
        return self._ale.game_over()
def main():
    if len(sys.argv) < 2:
        dir_rom = '/Users/lguan/Documents/Study/Research/Atari-2600-Roms/K-P/ms_pacman.bin'
    else:
        dir_rom = sys.argv[1]

    ale = ALEInterface()

    # Get & Set the desired settings
    ale.setInt(b'random_seed', 123)

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.
    USE_SDL = False
    if USE_SDL:
        # mac OS
        if sys.platform == 'darwin':
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)

        ale.setBool('display_screen', True)

    # Load the ROM file
    rom_file = str.encode(dir_rom)
    print('- Loading ROM - %s' % dir_rom)
    ale.loadROM(rom_file)
    print('- Complete loading ROM')

    (game_surface_width, game_surface_height) = ale.getScreenDims()
    print("game surface width/height: " + str(game_surface_width) + "/" +
          str(game_surface_height))

    (display_width, display_height) = (800, 640)
    print 'display width/height', (display_width, display_height)

    available_action = ale.getLegalActionSet()
    print available_action

    # init pygame
    pygame.init()
    display_screen = pygame.display.set_mode((display_width, display_height))
    pygame.display.set_caption(
        "Arcade Learning Environment Player Agent Display")

    # init clock
    clock = pygame.time.Clock()
    is_exit = False

    # Play 10 episodes
    for episode in range(10):
        if is_exit:
            break

        total_reward = 0

        while not ale.game_over() and not is_exit:

            a = getActionFromKeyboard()
            # Apply an action and get the resulting reward
            reward = ale.act(a)
            total_reward += reward
            # clear screen
            display_screen.fill((0, 0, 0))
            # render game surface
            renderGameSurface(ale, display_screen,
                              (game_surface_width, game_surface_height))
            # display related info
            displayRelatedInfo(display_screen, a, total_reward)

            pygame.display.flip()

            # process pygame event queue
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    is_exit = True
                    break
                if event.type == pygame.KEYDOWN and event.key == pygame.K_q:
                    is_exit = True
                    break

            # delay to 60fps
            clock.tick(60.)

        print('Episode %d ended with score: %d' % (episode, total_reward))
        ale.reset_game()
Esempio n. 23
0
class GameManager(object):
    """This class takes care of the interactions between an agent and
    a game across episodes, as well as overall logging of performance.
    """

    def __init__(
        self,
        game_name,
        agent,
        results_dir,
        n_epochs=1,
        n_episodes=None,
        n_frames=None,
        remove_old_results_dir=False,
        use_minimal_action_set=True,
        min_time_between_frames=0,
    ):
        """game_name is one of the supported games (there are many), as a string: "space_invaders.bin"
        agent is an an instance of a subclass of the Agent interface
        results_dir is a string representing a directory in which results and logs are placed
            If it does not exist, it is created.
        use_minimal_action_set determines whether the agent is offered all possible actions,
            or only those (minimal) that are applicable to the specific game.
        min_time_between_frames is the minimum required time in seconds between
            frames. If 0, the game is unrestricted.
        """
        self.game_name = game_name
        self.agent = agent
        self.use_minimal_action_set = use_minimal_action_set
        self.min_time_between_frames = min_time_between_frames
        self.n_epochs = n_epochs
        self.n_episodes = n_episodes
        self.n_frames = n_frames

        if (n_episodes is None and n_frames is None) or (n_episodes is not None and n_frames is not None):
            raise ValueError("Extacly one of n_episodes and n_frames " "must be defined")

        self.initialize_results_dir(results_dir, remove_old_results_dir)

        self.log = util.logging.Logger(
            ("settings", "step", "episode", "epoch", "overall"),
            "settings",
            os.path.join(self.results_dir, "GameManager.log"),
        )

        self.stats = util.logging.CSVLogger(
            os.path.join(self.results_dir, "stats.log"),
            header="epoch,episode,total_reward,n_frames,wall_time",
            print_items=True,
        )

        self._object_cache = dict()

        self.initialize_ale()
        self.initialize_agent()

        self.dump_settings()

    def initialize_results_dir(self, results_dir, remove_existing=False):
        """Creates the whole path of directories if they do no exist.
        If they do exist, raises an error unless remove_existing is True,
        in which case the existing directory is deleted.
        """
        now = datetime.now().strftime("%Y%m%d-%H-%M")
        # drop .bin, append current time down to the minute
        results_dir = os.path.join(results_dir, self.game_name[:-4] + now)

        if remove_existing:
            if os.path.exists(results_dir):
                shutil.rmtree(results_dir)
        # Should raise an error if directory exists
        os.makedirs(results_dir)

        self.results_dir = results_dir

    def initialize_ale(self):
        self.ale = ALEInterface()
        self.ale.loadROM(os.path.join(ROM_RELATIVE_LOCATION, self.game_name))

    def initialize_agent(self):
        RSC = namedtuple("RawStateCallbacks", ["raw", "grey", "rgb", "ram"])
        raw_state_callbacks = RSC(self.get_screen, self.get_screen_grayscale, self.get_screen_RGB, self.get_RAM)

        self.agent.set_raw_state_callbacks(raw_state_callbacks)
        self.agent.set_results_dir(self.results_dir)

        if self.use_minimal_action_set:
            actions = self.ale.getMinimalActionSet()
        else:
            actions = self.ale.getLegalActionSet()

        self.agent.set_available_actions(actions)

    def rest(self, already_elapsed):
        rest_time = self.min_time_between_frames - already_elapsed
        if rest_time > 0:
            sleep(rest_time)

    def run(self):
        """Runs self.n_epochs epochs, where the agent's learning is
        reset for each new epoch.
        Each epoch lasts self.n_episodes or self.n_frames, whichever is 
            defined.
        """
        self.log.overall("Starting run")
        run_start = time()
        for epoch in xrange(self.n_epochs):
            self.agent.reset()
            self.n_epoch = epoch
            self._run_epoch()
        self.log.overall("End of run ({:.2f} s)".format(time() - run_start))

    def _run_epoch(self):
        self.n_episode = 0

        start = time()
        while not self._stop_condition_met():
            self._run_episode()
            self.n_episode += 1
        wall_time = time() - start
        frames = self.ale.getFrameNumber()

        self.log.epoch("Finished epoch after {:.2f} seconds".format(wall_time))

    def _run_episode(self):
        self.ale.reset_game()
        self.agent.on_episode_start()

        total_reward = 0
        episode_start = time()

        while (not self.ale.game_over()) and (not self._stop_condition_met()):
            timestep_start = time()

            action = self.agent.select_action()
            reward = self.ale.act(action)
            self.agent.receive_reward(reward)

            total_reward += reward

            self.rest(time() - timestep_start)

        wall_time = time() - episode_start
        self.agent.on_episode_end()

        # Stats format: CSV with epoch, episode, total_reward, n_frames, wall_time
        self.stats.write(
            self.n_epoch, self.n_episode, total_reward, self.ale.getEpisodeFrameNumber(), "{:.2f}".format(wall_time)
        )

    def _stop_condition_met(self):
        if self.n_episodes:
            return self.n_episode >= self.n_episodes
        return self.ale.getFrameNumber() >= self.n_frames

    # Methods for state perception
    def get_screen(self):
        """Returns a matrix containing the current game screen in raw pixel data,
        i.e. before conversion to RGB. Handles reuse of np.array object, so it 
        will overwrite what is in the old object"""
        return self._cached("raw", self.ale.getScreen)

    def get_screen_grayscale(self):
        """Returns an np.array with the screen grayscale colours. 
        Handles reuse of np.array object, so it will overwrite what 
        is in the old object.
        """
        return self._cached("gray", self.ale.getScreenGrayscale)

    def get_screen_RGB(self):
        """Returns a numpy array with the screen's RGB colours. 
        The first positions contain the red colours, followed by
        the green colours and then the blue colours"""
        return self._cached("rgb", self.ale.getScreenRGB)

    def get_RAM(self):
        """Returns a vector containing current RAM content (byte-level).
        Handles reuse of np.array object, so it will overwrite what 
        is in the old object"""
        return self._cached("ram", self.ale.getRAM)

    def _cached(self, key, func):
        if key in self._object_cache:
            func(self._object_cache[key])
        else:
            self._object_cache[key] = func()

        return self._object_cache[key]

    def dump_settings(self):
        import json

        settings = self.get_settings()
        path = os.path.join(self.results_dir, "settings")
        with open(path, "w") as f:
            json.dump(settings, f, indent=4)

    def get_settings(self):
        """Returns a dict representing the settings needed to 
        reproduce this object and its subobjects
        """
        return {
            "game_name": self.game_name,
            "n_epochs": self.n_epochs,
            "n_episodes": self.n_episodes,
            "n_frames": self.n_frames,
            "agent": self.agent.get_settings(),
            "results_dir": self.results_dir,
            "use_minimal_action_set": self.use_minimal_action_set,
        }
Esempio n. 24
0
def forward(input, all = False):
    actionValues = sess.run(y, feed_dict={x: input})
    if all is True:
        return actionValues
    actionValue_max= np.max(actionValues)
    index = np.argmax(actionValues,axis = 1)
    return [index, actionValue_max]





ale = ALEInterface()
ale.loadROM("Breakout.A26")
legal_actions = ale.getLegalActionSet()
img = ale.getScreen()
actionIndex = forward(img)
reward = ale.act(legal_actions(actionIndex))

# Get & Set the desired settings
ale.setInt('random_seed', 123)
ale.setInt("frame_skip",frameSkip)


# Set USE_SDL to true to display the screen. ALE must be compilied
# with SDL enabled for this to work. On OSX, pygame init is used to
# proxy-call SDL_main.
USE_SDL = True
if USE_SDL:
  if sys.platform == 'darwin':
Esempio n. 25
0
class ALEEnvironment():
    def __init__(self, rom_file, args):

        self.ale = ALEInterface()
        self.histLen = 4

        if args.display_screen:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability', 0.0)
        self.ale.setBool('color_averaging', args.color_averaging)

        #if args.random_seed:
        #  self.ale.setInt('random_seed', args.random_seed)
        self.ale.setInt(
            'random_seed',
            0)  #hoang addition to fix the random seed across all environment
        self.ale.loadROM(rom_file)

        if args.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
            logger.info("Using minimal action set with size %d" %
                        len(self.actions))
        else:
            self.actions = self.ale.getLegalActionSet()
            logger.info("Using full action set with size %d" %
                        len(self.actions))
        logger.debug("Actions: " + str(self.actions))

        self.screen_width = args.screen_width
        self.screen_height = args.screen_height

        self.mode = "train"
        self.life_lost = False
        self.initSrcreen = self.getScreen()
        print("size of screen is:", self.initSrcreen.shape)
        im = Image.fromarray(self.initSrcreen)
        im.save('initial_screen.jpeg')
        im = Image.open('initial_screen.jpeg')
        pix = im.load()
        #  print "devil's color", pix[13,62]
        #  print "agent's color", pix[42,33]
        #  exit()

        # draw = ImageDraw.Draw(im)
        #  draw.rectangle([(37, 29),(48, 37)], outline = 'red')
        #   draw.rectangle([(69, 68), (73, 71)], outline = 'white')
        #   draw.rectangle([(7, 41), (11, 45)], outline = 'white')
        #   draw.rectangle([(11, 58), (15, 66)], outline = 'white')
        #   draw.rectangle([(70, 20), (73, 35)], outline='white') #right door
        #   draw.rectangle([(11, 68), (15, 71)], outline='white')
        #   im.save('first_subgoal_box.jpeg')
        #  exit()
        # use this tool to get bounding box: http://nicodjimenez.github.io/boxLabel/annotate.html
        self.goalSet = []
        # goal 0
        self.goalSet.append([[69, 68], [
            73, 71
        ]])  # Lower Right Ladder. This is the box for detecting first subgoal
        # self.goalSet.append([[11, 58], [15, 66]]) # lower left ladder 3
        # self.goalSet.append([[11, 68], [15, 71]])  # lower left ladder 3
        # goal 2
        self.goalSet.append([[7, 41],
                             [11, 45]])  # Key. This will be second sub goal

        self.goalSet.append([[11, 68], [15, 71]])  # lower left ladder 3
        # goal 4
        self.goalSet.append(
            [[69, 68],
             [73,
              71]])  # Lower Right Ladder again, this will be the third subgoal
        # goal 6
        self.goalSet.append([[70, 20],
                             [73,
                              35]])  # Right Door. This will be the 4th subgoal
        self.goalCenterLoc = []
        for goal in self.goalSet:
            goalCenter = [
                float(goal[0][0] + goal[1][0]) / 2,
                float(goal[0][1] + goal[1][1]) / 2
            ]
            self.goalCenterLoc.append(goalCenter)
        self.agentOriginLoc = [42, 33]
        self.agentLastX = 42
        self.agentLastY = 33
        self.devilLastX = 0
        self.devilLastY = 0
        self.reachedGoal = [0, 0, 0, 0, 0, 0, 0]
        self.histState = self.initializeHistState()

    def initializeHistState(self):
        histState = np.concatenate((self.getState(), self.getState()), axis=2)
        histState = np.concatenate((histState, self.getState()), axis=2)
        histState = np.concatenate((histState, self.getState()), axis=2)
        return histState

    def numActions(self):
        return len(self.actions)

    def resetGoalReach(self):
        self.reachedGoal = [0, 0, 0, 0, 0, 0, 0, 0]

    def restart(self):
        self.ale.reset_game()
        self.life_lost = False
        self.reachedGoal = [0, 0, 0, 0, 0, 0, 0]
        for i in range(19):
            self.act(0)  #wait for initialization
        self.histState = self.initializeHistState()
        self.agentLastX = self.agentOriginLoc[0]
        self.agentLastY = self.agentOriginLoc[1]

    """
  def restart(self):
    # In test mode, the game is simply initialized. In train mode, if the game
    # is in terminal state due to a life loss but not yet game over, then only
    # life loss flag is reset so that the next game starts from the current
    # state. Otherwise, the game is simply initialized.
    if (
                  self.mode == 'test' or
                not self.life_lost or  # `reset` called in a middle of episode
              self.ale.game_over()  # all lives are lost
    ):
      self.ale.reset_game()
    self.life_lost = False
    self.reachedGoal = [0, 0, 0]
    for i in range(19):
      self.act(0) #wait for initialization
    self.histState = self.initializeHistState()
    self.agentLastX = self.agentOriginLoc[0]
    self.agentLastY = self.agentOriginLoc[1]
  """

    def beginNextLife(self):
        self.life_lost = False
        self.reachedGoal = [0, 0, 0, 0, 0, 0, 0]
        for i in range(19):
            self.act(0)  #wait for initialization
        self.histState = self.initializeHistState()
        self.agentLastX = self.agentOriginLoc[0]
        self.agentLastY = self.agentOriginLoc[1]

    def act(self, action):
        lives = self.ale.lives()
        reward = self.ale.act(self.actions[action])
        self.life_lost = (not lives == self.ale.lives())
        currState = self.getState()
        self.histState = np.concatenate((self.histState[:, :, 1:], currState),
                                        axis=2)
        return reward

    def getScreen(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, (self.screen_width, self.screen_height))
        return resized

    def getScreenRGB(self):
        screen = self.ale.getScreenRGB()
        resized = cv2.resize(screen, (self.screen_width, self.screen_height))
        #resized = screen
        return resized

    def getAgentLoc(self, img):
        #  img = self.getScreenRGB()

        man = [200, 72, 72]
        mask = np.zeros(np.shape(img))
        mask[:, :, 0] = man[0]
        mask[:, :, 1] = man[1]
        mask[:, :, 2] = man[2]

        diff = img - mask
        indxs = np.where(diff == 0)
        diff[np.where(diff < 0)] = 0
        diff[np.where(diff > 0)] = 0
        diff[indxs] = 255
        if (np.shape(indxs[0])[0] == 0):
            mean_x = self.agentLastX
            mean_y = self.agentLastY
        else:
            mean_y = np.sum(indxs[0]) / np.shape(indxs[0])[0]
            mean_x = np.sum(indxs[1]) / np.shape(indxs[1])[0]
        self.agentLastX = mean_x
        self.agentLastY = mean_y
        return (mean_x, mean_y)

    def getDevilLoc(self, img):
        #    img = self.getScreenRGB()
        # man = [0, 16, 2]
        devilColor = [236, 236, 236]
        mask = np.zeros(np.shape(img))
        mask[:, :, 0] = devilColor[0]
        mask[:, :, 1] = devilColor[1]
        mask[:, :, 2] = devilColor[2]
        diff = img - mask
        indxs = np.where(diff == 0)
        diff[np.where(diff < 0)] = 0
        diff[np.where(diff > 0)] = 0
        diff[indxs] = 255
        if (np.shape(indxs[0])[0] == 0):
            mean_x = self.devilLastX
            mean_y = self.devilLastY
        else:
            mean_y = np.sum(indxs[0]) / np.shape(indxs[0])[0]
            mean_x = np.sum(indxs[1]) / np.shape(indxs[1])[0]
        self.devilLastX = mean_x
        self.devilLastY = mean_y
        return (mean_x, mean_y)

    def distanceReward(self, lastGoal, goal):
        if (lastGoal == -1):
            lastGoalCenter = self.agentOriginLoc
        else:
            lastGoalCenter = self.goalCenterLoc[lastGoal]
        goalCenter = self.goalCenterLoc[goal]
        agentX, agentY = self.getAgentLoc()
        dis = np.sqrt((goalCenter[0] - agentX) * (goalCenter[0] - agentX) +
                      (goalCenter[1] - agentY) * (goalCenter[1] - agentY))
        disLast = np.sqrt((lastGoalCenter[0] - agentX) *
                          (lastGoalCenter[0] - agentX) +
                          (lastGoalCenter[1] - agentY) *
                          (lastGoalCenter[1] - agentY))
        disGoals = np.sqrt((goalCenter[0] - lastGoalCenter[0]) *
                           (goalCenter[0] - lastGoalCenter[0]) +
                           (goalCenter[1] - lastGoalCenter[1]) *
                           (goalCenter[1] - lastGoalCenter[1]))
        return 0.001 * (disLast - dis) / disGoals

    # add color channel for input of network
    def getState(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, (self.screen_width, self.screen_height))
        return np.reshape(resized, (84, 84, 1))

    def getStackedState(self):
        return self.histState

    def isTerminal(self):
        if self.mode == 'train':
            return self.ale.game_over() or self.life_lost
        return self.ale.game_over()

    def isGameOver(self):
        return self.ale.game_over()

    def isLifeLost(self):
        return self.life_lost

    def reset(self):
        self.ale.reset_game()
        self.life_lost = False

    def goalReached(self, goal):
        # if goal in [0,2,4,6]: # those are original task where bounding boxes are used to detect the location of agents
        subset = [
            0, 2, 3, 4, 6
        ]  # those are original task where bounding boxes are used to detect the location of agents
        if goal in subset:
            # goal_index = goal/2
            goal_index = subset.index(goal)
            goalPosition = self.goalSet[goal_index]
            goalScreen = self.initSrcreen
            stateScreen = self.getScreen()
            count = 0
            for y in range(goalPosition[0][0], goalPosition[1][0]):
                for x in range(goalPosition[0][1], goalPosition[1][1]):
                    if goalScreen[x][y] != stateScreen[x][y]:
                        count = count + 1
            # 30 is total number of pixels of agent
            if float(count) / 30 > 0.3:
                self.reachedGoal[goal] = 1
                return True
        if goal == 1:
            # detect if agent is to the left of the devil
            #    return self.agent_left_devil()
            return self.detect_left_ladder()
        ############## -- DML modified -- ###########
        # if goal == 4:
        #     # detect if agent is to the right of the devil
        # #    return self.agent_right_devil()
        #     return self.detect_right_ladder()
        ################# -- end -- ###########
        if goal == 5:
            # detect if the agent is back to the original location
            return self.original_location_reached()
        return False

    def detect_right_ladder(self):
        goalPosition = self.goalSet[0]
        goalScreen = self.initSrcreen
        stateScreen = self.getScreen()
        count = 0
        for y in range(goalPosition[0][0], goalPosition[1][0]):
            for x in range(goalPosition[0][1], goalPosition[1][1]):
                if goalScreen[x][y] != stateScreen[x][y]:
                    count = count + 1
        # 30 is total number of pixels of agent
        if float(count) / 30 > 0.3:
            goal = 5
            self.reachedGoal[goal] = 1
            return True
        return False

    def detect_left_ladder(self):
        goalPosition = self.goalSet[2]
        goalScreen = self.initSrcreen
        stateScreen = self.getScreen()
        count = 0
        for y in range(goalPosition[0][0], goalPosition[1][0]):
            for x in range(goalPosition[0][1], goalPosition[1][1]):
                if goalScreen[x][y] != stateScreen[x][y]:
                    count = count + 1
        # 30 is total number of pixels of agent
        if float(count) / 30 > 0.3:
            goal = 5
            self.reachedGoal[goal] = 1
            return True
        return False

    def original_location_reached(self):
        img = self.getScreenRGB()
        (x, y) = self.getAgentLoc(img)
        #  print "Agent's location:",x,y
        if abs(x - 42) <= 2 and abs(y - 33) <= 2:
            return True
        else:
            return False

    def pause(self):
        os.system('read -s -n 1 -p "Press any key to continue...\n"')

    def agent_left_devil(self):
        img = self.ale.getScreenRGB()
        (x, y) = self.getAgentLoc(img)
        (a, b) = self.getDevilLoc(img)
        #  print "Agent's location:",x,y
        #  print "Devil's location:", a,b
        if (a - x > 40) and (abs(y - b) <= 40):
            return True
        else:
            return False

    def agent_right_devil(self):
        img = self.getScreenRGB()
        (x, y) = self.getAgentLoc(img)
        (a, b) = self.getDevilLoc(img)
        # print "Agent's location:",x,y
        # print "Devil's location:",a,b

        # if (x-a > 25) and (abs(y-b) <= 40):
        if (x - a > 40) and (abs(y - b) <= 40):
            return True
        else:
            return False

    def goalNotReachedBefore(self, goal):
        if (self.reachedGoal[goal] == 1):
            return False
        return True
class aleForET:
    def __init__(self,rom_file, screen):
        self.screen = screen

        pygame.init()

        self.ale = ALEInterface()
        GAME_W, GAME_H = 160, 210

        # Setting up the pygame screen Surface
        self.size = GAME_W * V.xSCALE, GAME_H * V.ySCALE

        # Get & Set the desired settings
        self.ale.setInt('random_seed', randint(0,200))
        self.ale.setBool('sound', False)
        self.ale.setBool('display_screen', False)
        self.ale.setBool('color_averaging', False)
        self.ale.setFloat('repeat_action_probability', 0.0)

        # Load the ROM file
        self.ale.loadROM(rom_file)
        self.gamename = os.path.basename(rom_file).split('.')[0]

        # Get the list of legal actions
        self.legal_actions = self.ale.getLegalActionSet()

    def run(self, gc_window_drawer_func = None, save_screen_func = None, event_handler_func = None, record_a_and_r_func = None):
        last_time=time.time()
        frame_cnt=0
        clock = pygame.time.Clock()
        # Play 10 episodes
        for episode in xrange(EPISODES):
            total_reward = 0
            while not self.ale.game_over():
                clock.tick(FRAME_RATE) # control FPS
                frame_cnt+=1

                key = pygame.key.get_pressed()
                if event_handler_func != None:
                    stop, eyelink_err_code, bool_drawgc = event_handler_func(key)
                    if stop:
                        return eyelink_err_code

                # Display FPS
                diff_time = time.time()-last_time
                if diff_time > 1.0:
                    print 'FPS: %.1f' % clock.get_fps()
                    last_time=time.time()

                # Show game image
                cur_frame_np = self.ale.getScreenRGB()
                cur_frame_Surface = pygame.surfarray.make_surface(cur_frame_np)
                cur_frame_Surface = pygame.transform.flip(cur_frame_Surface, True, False)
                cur_frame_Surface = pygame.transform.rotate(cur_frame_Surface, 90)
                # Perform scaling directly on screen, leaving cur_frame_Surface unscaled.
                # Slightly faster than scaling cur_frame_Surface and then transfer to screen.
                pygame.transform.scale(cur_frame_Surface, self.size, self.screen)

                if gc_window_drawer_func != None and bool_drawgc:
                    gc_window_drawer_func(self.screen)
                pygame.display.flip()

                # Save frame to disk (160*210, i.e. not scaled; because this is faster)
                if save_screen_func != None:
                    save_screen_func(cur_frame_Surface, frame_cnt)

                # Apply an action and get the resulting reward
                a_index = aenum.action_map(key, self.gamename)
                a = self.legal_actions[a_index]
                reward = self.ale.act(a);
                total_reward += reward
                if record_a_and_r_func != None:
                    record_a_and_r_func(a, reward)

                pygame.event.pump() # need this line to get new key pressed

            print 'Episode', episode, 'ended with score:', total_reward
            self.ale.reset_game()

        TRIAL_OK = 0 # copied from EyeLink's constant
        return TRIAL_OK

    def run_in_step_by_step_mode(self, gc_window_drawer_func = None, save_screen_func = None, event_handler_func = None, record_a_and_r_func = None):
        frame_cnt=0
        bool_drawgc = False
        clock = pygame.time.Clock()
        # Play 10 episodes
        for episode in xrange(10):
            total_reward = 0
            while not self.ale.game_over():
                # Get game image
                cur_frame_np = self.ale.getScreenRGB()
                cur_frame_Surface = pygame.surfarray.make_surface(cur_frame_np)
                cur_frame_Surface = pygame.transform.flip(cur_frame_Surface, True, False)
                cur_frame_Surface = pygame.transform.rotate(cur_frame_Surface, 90)

                frame_cnt+=1
                # Save frame to disk (160*210, i.e. not scaled; because this is faster)
                if save_screen_func != None:
                    save_screen_func(cur_frame_Surface, frame_cnt)

                key, draw_next_game_frame = None, False
                while not draw_next_game_frame:
                    clock.tick(FRAME_RATE) # control FPS

                    key = pygame.key.get_pressed()
                    if event_handler_func != None:
                        stop, eyelink_err_code, bool_drawgc = event_handler_func(key)
                        if stop:
                            return eyelink_err_code
                    a_index = aenum.action_map(key, self.gamename)
                    # Not in all cases when action_map returns "NO OP" is the real action "NO OP",
                    # Only when the human press "TAB", is the real action "NO OP".
                    if (a_index == aenum.PLAYER_A_NOOP and key[pygame.K_TAB]) \
                    or  a_index != aenum.PLAYER_A_NOOP:
                        draw_next_game_frame = True

                    # Draw the image onto screen.
                    # Perform scaling directly on screen, leaving cur_frame_Surface unscaled.
                    pygame.transform.scale(cur_frame_Surface, self.size, self.screen)

                    if gc_window_drawer_func != None and bool_drawgc:
                        gc_window_drawer_func(self.screen)

                    pygame.display.flip()
                    pygame.event.pump() # need this line to get new key pressed

                # Apply an action and get the resulting reward
                a = self.legal_actions[a_index]
                reward = self.ale.act(a);
                print("step reward: ", reward)
                total_reward += reward
                if record_a_and_r_func != None:
                    record_a_and_r_func(a, reward)

            print 'Episode', episode, 'ended with score:', total_reward
            self.ale.reset_game()

        TRIAL_OK = 0 # copied from EyeLink's constant
        return TRIAL_OK
Esempio n. 27
0
class AleAgent:
    ##
    # @param processing_cls Class for processing game visual unput
    def __init__(self,
                 processing_cls,
                 game_rom=None,
                 encoder_model=None,
                 encoder_weights=None,
                 NFQ_model=None,
                 NFQ_weights=None):
        assert game_rom is not None
        self.game = ALEInterface()
        if encoder_weights is not None and encoder_model is not None:
            self.encoder = Encoder(path_to_model=encoder_model,
                                   path_to_weights=encoder_weights)
        else:
            self.encoder = Encoder()

        self.processor = processing_cls()

        # Get & Set the desired settings
        self.game.setInt('random_seed', 0)
        self.game.setInt('frame_skip', 4)

        # Set USE_SDL to true to display the screen. ALE must be compilied
        # with SDL enabled for this to work. On OSX, pygame init is used to
        # proxy-call SDL_main.
        USE_SDL = True

        if USE_SDL:
            if sys.platform == 'darwin':
                pygame.init()
                self.game.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.game.setBool('sound', False)  # no sound

            self.game.setBool('display_screen', True)

        # Load the ROM file
        self.game.loadROM(game_rom)

        # Get the list of legal actions
        self.legal_actions = self.game.getLegalActionSet()

        # Get actions applicable in current game
        self.minimal_actions = self.game.getMinimalActionSet()

        if NFQ_model is not None and NFQ_weights is not None:
            self.NFQ = NFQ(self.encoder.out_dim,
                           len(self.minimal_actions),
                           model_path=NFQ_model,
                           weights_path=NFQ_weights)
        else:
            self.NFQ = NFQ(self.encoder.out_dim, len(self.minimal_actions))

        (self.screen_width, self.screen_height) = self.game.getScreenDims()
        self.screen_data = np.zeros((self.screen_height, self.screen_width),
                                    dtype=np.uint8)

    ##
    # Initialize the reinforcement learning
    def train(self, num_of_episodes=1500, eps=0.995, key_binding=None):
        pygame.init()
        for episode in xrange(num_of_episodes):
            total_reward = 0
            moves = 0
            hits = 0
            print 'Starting episode: ', episode + 1

            if key_binding:
                eps = 0.05
            else:
                eps -= 2 / num_of_episodes

            self.game.getScreenGrayscale(self.screen_data)
            pooled_data = self.processor.process(self.screen_data)
            next_state = self.encoder.encode(pooled_data)
            while not self.game.game_over():
                current_state = next_state
                x = None

                if key_binding:
                    key_pressed = pygame.key.get_pressed()
                    x = key_binding(key_pressed)

                if x is None:
                    r = np.random.rand()
                    if r < eps:
                        x = np.random.randint(self.minimal_actions.size)
                    else:
                        x = self.NFQ.predict_action(current_state)

                a = self.minimal_actions[x]
                # Apply an action and get the resulting reward
                reward = self.game.act(a)

                # record only every 3 frames
                # if not moves % 3:
                self.game.getScreenGrayscale(self.screen_data)
                pooled_data = self.processor.process(self.screen_data)
                next_state = self.encoder.encode(pooled_data)
                transition = np.append(current_state, x)
                transition = np.append(transition, next_state)
                transition = np.append(transition, reward)
                self.NFQ.add_transition(transition)

                total_reward += reward
                if reward > 0:
                    hits += 1

                moves += 1
                if eps > 0.1:
                    eps -= 0.00001
            # end while

            print 'Epsilon: ', eps
            print 'Episode', episode + 1, 'ended with score:', total_reward
            print 'Hits: ', hits
            self.game.reset_game()
            self.NFQ.train()
            hits = 0
            moves = 0
            self.NFQ.save_net()
        # end for

    ##
    # Play the game!
    def play(self):
        total_reward = 0
        moves = 1
        while not self.game.game_over():
            self.game.getScreenGrayscale(self.screen_data)
            pooled_data = self.processor.process(self.screen_data)
            current_state = self.encoder.encode(pooled_data)

            x = self.NFQ.predict_action(current_state)
            a = self.minimal_actions[x]
            reward = self.game.act(a)
            total_reward += reward
            moves += 1

        print 'The game ended with score:', total_reward, ' after: ', moves, ' moves'
Esempio n. 28
0
class Emulate:
    def __init__(self,
                 rom_file,
                 display_screen=False,
                 frame_skip=4,
                 screen_height=84,
                 screen_width=84,
                 repeat_action_probability=0,
                 color_averaging=True,
                 random_seed=0,
                 record_screen_path='screen_pics',
                 record_sound_filename=None,
                 minimal_action_set=True):
        self.ale = ALEInterface()
        if display_screen:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        self.ale.setInt('frame_skip', frame_skip)
        self.ale.setFloat('repeat_action_probability',
                          repeat_action_probability)
        self.ale.setBool('color_averaging', color_averaging)

        if random_seed:
            self.ale.setInt('random_seed', random_seed)

        self.ale.loadROM(rom_file)

        if minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
        else:
            self.actions = self.ale.getLegalActionSet()

        self.dims = (screen_width, screen_height)

    def numActions(self):
        return len(self.actions)

    def getActions(self):
        return self.actions

    def restart(self):
        self.ale.reset_game()

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def getScreen(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        return resized

    def getScreenGray(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        rotated = np.rot90(resized, k=1)
        return rotated

    def getScreenColor(self):
        screen = self.ale.getScreenRGB()
        resized = cv2.resize(screen, self.dims)
        rotated = np.rot90(resized, k=1)
        return rotated

    def isTerminal(self):
        return self.ale.game_over()
Esempio n. 29
0
class GameManager(object):
    """This class takes care of the interactions between an agent and
    a game across episodes, as well as overall logging of performance.
    """
    def __init__(self,
                 game_name,
                 agent,
                 results_dir,
                 n_epochs=1,
                 n_episodes=None,
                 n_frames=None,
                 remove_old_results_dir=False,
                 use_minimal_action_set=True,
                 min_time_between_frames=0):
        """game_name is one of the supported games (there are many), as a string: "space_invaders.bin"
        agent is an an instance of a subclass of the Agent interface
        results_dir is a string representing a directory in which results and logs are placed
            If it does not exist, it is created.
        use_minimal_action_set determines whether the agent is offered all possible actions,
            or only those (minimal) that are applicable to the specific game.
        min_time_between_frames is the minimum required time in seconds between
            frames. If 0, the game is unrestricted.
        """
        self.game_name = game_name
        self.agent = agent
        self.use_minimal_action_set = use_minimal_action_set
        self.min_time_between_frames = min_time_between_frames
        self.n_epochs = n_epochs
        self.n_episodes = n_episodes
        self.n_frames = n_frames

        if ((n_episodes is None and n_frames is None)
                or (n_episodes is not None and n_frames is not None)):
            raise ValueError("Extacly one of n_episodes and n_frames "
                             "must be defined")

        self.initialize_results_dir(results_dir, remove_old_results_dir)

        self.log = util.logging.Logger(
            ('settings', 'step', 'episode', 'epoch', 'overall'), 'settings',
            os.path.join(self.results_dir, 'GameManager.log'))

        self.stats = util.logging.CSVLogger(
            os.path.join(self.results_dir, 'stats.log'),
            header='epoch,episode,total_reward,n_frames,wall_time',
            print_items=True)

        self._object_cache = dict()

        self.initialize_ale()
        self.initialize_agent()

        self.dump_settings()

    def initialize_results_dir(self, results_dir, remove_existing=False):
        """Creates the whole path of directories if they do no exist.
        If they do exist, raises an error unless remove_existing is True,
        in which case the existing directory is deleted.
        """
        now = datetime.now().strftime('%Y%m%d-%H-%M')
        # drop .bin, append current time down to the minute
        results_dir = os.path.join(results_dir, self.game_name[:-4] + now)

        if remove_existing:
            if os.path.exists(results_dir):
                shutil.rmtree(results_dir)
        # Should raise an error if directory exists
        os.makedirs(results_dir)

        self.results_dir = results_dir

    def initialize_ale(self):
        self.ale = ALEInterface()
        self.ale.loadROM(os.path.join(ROM_RELATIVE_LOCATION, self.game_name))

    def initialize_agent(self):
        RSC = namedtuple('RawStateCallbacks', ['raw', 'grey', 'rgb', 'ram'])
        raw_state_callbacks = RSC(self.get_screen, self.get_screen_grayscale,
                                  self.get_screen_RGB, self.get_RAM)

        self.agent.set_raw_state_callbacks(raw_state_callbacks)
        self.agent.set_results_dir(self.results_dir)

        if self.use_minimal_action_set:
            actions = self.ale.getMinimalActionSet()
        else:
            actions = self.ale.getLegalActionSet()

        self.agent.set_available_actions(actions)

    def rest(self, already_elapsed):
        rest_time = self.min_time_between_frames - already_elapsed
        if rest_time > 0:
            sleep(rest_time)

    def run(self):
        """Runs self.n_epochs epochs, where the agent's learning is
        reset for each new epoch.
        Each epoch lasts self.n_episodes or self.n_frames, whichever is 
            defined.
        """
        self.log.overall('Starting run')
        run_start = time()
        for epoch in xrange(self.n_epochs):
            self.agent.reset()
            self.n_epoch = epoch
            self._run_epoch()
        self.log.overall('End of run ({:.2f} s)'.format(time() - run_start))

    def _run_epoch(self):
        self.n_episode = 0

        start = time()
        while not self._stop_condition_met():
            self._run_episode()
            self.n_episode += 1
        wall_time = (time() - start)
        frames = self.ale.getFrameNumber()

        self.log.epoch("Finished epoch after {:.2f} seconds".format(wall_time))

    def _run_episode(self):
        self.ale.reset_game()
        self.agent.on_episode_start()

        total_reward = 0
        episode_start = time()

        while (not self.ale.game_over()) and (not self._stop_condition_met()):
            timestep_start = time()

            action = self.agent.select_action()
            reward = self.ale.act(action)
            self.agent.receive_reward(reward)

            total_reward += reward

            self.rest(time() - timestep_start)

        wall_time = time() - episode_start
        self.agent.on_episode_end()

        # Stats format: CSV with epoch, episode, total_reward, n_frames, wall_time
        self.stats.write(self.n_epoch, self.n_episode, total_reward,
                         self.ale.getEpisodeFrameNumber(),
                         '{:.2f}'.format(wall_time))

    def _stop_condition_met(self):
        if self.n_episodes:
            return self.n_episode >= self.n_episodes
        return self.ale.getFrameNumber() >= self.n_frames

    # Methods for state perception
    def get_screen(self):
        """Returns a matrix containing the current game screen in raw pixel data,
        i.e. before conversion to RGB. Handles reuse of np.array object, so it 
        will overwrite what is in the old object"""
        return self._cached('raw', self.ale.getScreen)

    def get_screen_grayscale(self):
        """Returns an np.array with the screen grayscale colours. 
        Handles reuse of np.array object, so it will overwrite what 
        is in the old object.
        """
        return self._cached('gray', self.ale.getScreenGrayscale)

    def get_screen_RGB(self):
        """Returns a numpy array with the screen's RGB colours. 
        The first positions contain the red colours, followed by
        the green colours and then the blue colours"""
        return self._cached('rgb', self.ale.getScreenRGB)

    def get_RAM(self):
        """Returns a vector containing current RAM content (byte-level).
        Handles reuse of np.array object, so it will overwrite what 
        is in the old object"""
        return self._cached('ram', self.ale.getRAM)

    def _cached(self, key, func):
        if key in self._object_cache:
            func(self._object_cache[key])
        else:
            self._object_cache[key] = func()

        return self._object_cache[key]

    def dump_settings(self):
        import json

        settings = self.get_settings()
        path = os.path.join(self.results_dir, 'settings')
        with open(path, 'w') as f:
            json.dump(settings, f, indent=4)

    def get_settings(self):
        """Returns a dict representing the settings needed to 
        reproduce this object and its subobjects
        """
        return {
            "game_name": self.game_name,
            "n_epochs": self.n_epochs,
            "n_episodes": self.n_episodes,
            "n_frames": self.n_frames,
            "agent": self.agent.get_settings(),
            "results_dir": self.results_dir,
            "use_minimal_action_set": self.use_minimal_action_set,
        }
Esempio n. 30
0
class ALEEnvironment(Environment):
  def __init__(self, rom_file, args):
    from ale_python_interface import ALEInterface
    self.ale = ALEInterface()
    if args.display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', args.frame_skip)
    self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
    self.ale.setBool('color_averaging', args.color_averaging)

    if args.random_seed:
      self.ale.setInt('random_seed', args.random_seed)

    if args.record_screen_path:
      if not os.path.exists(args.record_screen_path):
        logger.info("Creating folder %s" % args.record_screen_path)
        os.makedirs(args.record_screen_path)
      logger.info("Recording screens to %s", args.record_screen_path)
      self.ale.setString('record_screen_dir', args.record_screen_path)

    if args.record_sound_filename:
      logger.info("Recording sound to %s", args.record_sound_filename)
      self.ale.setBool('sound', True)
      self.ale.setString('record_sound_filename', args.record_sound_filename)

    self.ale.loadROM(rom_file)

    if args.minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
      logger.info("Using minimal action set with size %d" % len(self.actions))
    else:
      self.actions = self.ale.getLegalActionSet()
      logger.info("Using full action set with size %d" % len(self.actions))
    logger.debug("Actions: " + str(self.actions))

    self.screen_width = args.screen_width
    self.screen_height = args.screen_height

    self.life_lost = False

  def numActions(self):
    return len(self.actions)

  def restart(self):
    # In test mode, the game is simply initialized. In train mode, if the game
    # is in terminal state due to a life loss but not yet game over, then only
    # life loss flag is reset so that the next game starts from the current
    # state. Otherwise, the game is simply initialized.
    if (
        self.mode == 'test' or
        not self.life_lost or  # `reset` called in a middle of episode
        self.ale.game_over()  # all lives are lost
    ):
      self.ale.reset_game()
    self.life_lost = False

  def act(self, action):
    lives = self.ale.lives()
    reward = self.ale.act(self.actions[action])
    self.life_lost = (not lives == self.ale.lives())
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, (self.screen_width, self.screen_height))
    return resized

  def isTerminal(self):
    if self.mode == 'train':
      return self.ale.game_over() or self.life_lost
    return self.ale.game_over()
Esempio n. 31
0
class AleEnv(object):
    '''ALE wrapper for RL training
    game_over_conditions={'points':(-1, 1)}: dict that describes all desired game over conditions
    each key corresponds to a condition that is checked; the first condition met produces a game over
        points: int or tuple of integers
            int:
                if x < 0, game ends when score is <= x
                if x >= 0, game ends when score is >= x
            tuple:
                game ends if score <= x[0] or score >= x[1]
        lives: int that ends game when lives <= x
        frames: int that ends game when total number of frames >= x
        episodes: int that ends game when num of episodes >= x
            Use max_num_frames_per_episode to set max episode length


    '''

    # will include timing and hidden functionality in future iterations

    def __init__(self,
                 rom_file,
                 display_screen=False,
                 sound=False,
                 random_seed=0,
                 game_over_conditions={},
                 frame_skip=1,
                 repeat_action_probability=0.25,
                 max_num_frames_per_episode=0,
                 min_action_set=False,
                 screen_color='gray',
                 fps=60,
                 output_buffer_size=1,
                 reduce_screen=False):

        # ALE instance and setup
        self.ale = ALEInterface()
        #TODO: check if rom file exists; will crash jupyter kernel otherwise
        self.ale.loadROM(str.encode(rom_file))

        self.ale.setBool(b'sound', sound)
        self.ale.setBool(b'display_screen', display_screen)

        if min_action_set:
            self.legal_actions = self.ale.getMinimalActionSet()
        else:
            self.legal_actions = self.ale.getLegalActionSet()

        self.ale.setInt(b'random_seed', random_seed)
        self.ale.setInt(b'frame_skip', frame_skip)
        self.frame_skip = frame_skip
        self.ale.setFloat(b'repeat_action_probability',
                          repeat_action_probability)
        self.ale.setInt(b'max_num_frames_per_episode',
                        max_num_frames_per_episode)

        self.ale.loadROM(str.encode(rom_file))

        self.game_over_conditions = game_over_conditions
        self.screen_color = screen_color
        self.reduce_screen = reduce_screen
        self.d_frame = (fps**-1) * self.frame_skip

        # set up output buffer
        self.output_buffer_size = output_buffer_size
        self.queue_size = self.output_buffer_size
        self._reset_params()

    def observe(self, flatten=False, expand_dim=False):

        if flatten is True:
            out = np.stack(self.output_queue[i]
                           for i in range(self.output_buffer_size)).flatten()

            if expand_dim is True:
                return np.expand_dims(np.expand_dims(out, axis=0), axis=1)
            else:
                return out
        else:
            out = np.stack(self.output_queue[i]
                           for i in range(self.output_buffer_size))
            out = np.squeeze(out)

            if expand_dim is True:
                return np.expand_dims(np.expand_dims(out, axis=0), axis=1)

            else:
                return out

    @property
    def width(self):
        return self.game_screen.shape[1]

    @property
    def height(self):
        return self.game_screen.shape[0]

    @property
    def game_over(self):
        return self._game_over()

    @property
    def actions(self):
        return self.legal_actions

    @property
    def lives(self):
        return self.ale.lives()

    def _reset_params(self):

        self.total_points = 0
        self.total_frames = 0
        self.curr_episode = 1
        self.prev_ep_frame_num = -float("inf")

        if self.screen_color == 'gray' or self.screen_color == 'grey':
            self.game_screen = np.squeeze(self.ale.getScreenGrayscale())
            if self.reduce_screen:
                self.game_screen = resize(self.game_screen,
                                          output_shape=(110, 84))
                self.game_screen = self.game_screen[0 + 21:-1 - 4, :]
        elif self.screen_color == 'rgb' or self.screen_color == 'color':
            self.game_screen = self.ale.getScreenRGB()
            if self.reduce_screen:
                self.game_screen = resize(self.game_screen,
                                          output_shape=(110, 84, 3))
                self.game_screen = self.game_screen[0 + 21:-1 - 4, :, :]

        self.output_queue = deque(
            np.zeros(shape=(self.queue_size - 1, self.height, self.width)),
            self.queue_size)
        self.output_queue.appendleft(self.game_screen)

    def reset(self):
        self.ale.reset_game()
        self._reset_params()

    def act(self, action):
        reward = self.ale.act(self.legal_actions[action])

        if self.screen_color == 'gray' or self.screen_color == 'grey':
            self.game_screen = np.squeeze(self.ale.getScreenGrayscale())
            if self.reduce_screen:
                self.game_screen = resize(self.game_screen,
                                          output_shape=(110, 84))
                self.game_screen = self.game_screen[0 + 21:-1 - 4, :]
        elif self.screen_color == 'rgb' or self.screen_color == 'color':
            self.game_screen = self.ale.getScreenRGB()
            if self.reduce_screen:
                self.game_screen = resize(self.game_screen,
                                          output_shape=(110, 84, 3))
                self.game_screen = self.game_screen[0 + 21:-1 - 4, :, :]

        self.output_queue.pop()
        self.output_queue.appendleft(self.game_screen)

        self.total_points += reward
        self.total_frames += self.frame_skip
        if self.ale.getEpisodeFrameNumber() <= self.prev_ep_frame_num:
            self.curr_episode += 1
        self.prev_ep_frame_num = self.ale.getEpisodeFrameNumber()

        return reward, self.d_frame, self.game_over

    def _game_over(self):
        if self.ale.game_over():
            return True

        for cond in self.game_over_conditions:
            if cond == 'points':
                if isinstance(self.game_over_conditions[cond], int):
                    if self.total_points >= self.game_over_conditions[cond]:
                        return True
                elif isinstance(self.game_over_conditions[cond], tuple):
                    if (self.total_points <= self.game_over_conditions[cond][0]
                            or self.total_points >=
                            self.game_over_conditions[cond][1]):
                        return True
            elif cond == 'lives':
                if self.lives <= self.game_over_conditions[cond]:
                    return True
            elif cond == 'frames':
                if self.total_frames >= self.game_over_conditions[cond]:
                    return True
            elif cond == 'episodes':
                if self.curr_episode >= self.game_over_conditions[cond]:
                    return True
            else:
                raise RuntimeError("ERROR: Invalid game over condition")

        return False
Esempio n. 32
0
class ALEEnvironment():
    def __init__(self, config):
        self.history = History3D(config)
        self.history_length = config.history_length
        self.mode = config.mode
        self.life_lost = False
        self.terminal = False
        self.score = 0
        #cv2.namedWindow("Image")

        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()
        if config.display_screen:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', False)
            self.ale.setBool('display_screen', True)

        self.ale.setInt('frame_skip',
                        config.frame_skip)  # Whether skip frames or not
        self.ale.setBool('color_averaging', config.color_averaging)

        if config.random_seed:  # Random seed for repeatable experiments.
            self.ale.setInt('random_seed', config.random_seed)

        if config.record_screen_path:
            if not os.path.exists(config.record_screen_path):
                os.makedirs(config.record_screen_path)
            self.ale.setString('record_screen_dir', config.record_screen_path)

        if config.record_sound_filename:
            self.ale.setBool('sound', True)
            self.ale.setString('record_sound_filename',
                               config.record_sound_filename)

        self.ale.loadROM(config.rom_file)

        if config.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
        else:
            self.actions = self.ale.getLegalActionSet()

        self.screen_width = config.screen_width
        self.screen_height = config.screen_height

    def numActions(self):
        return len(self.actions)

    def new_game(self):
        state, terminal = self.reset()
        for _ in range(self.history_length + 1):
            self.history.add(state)
        return state, terminal, list(range(len(self.actions)))

    def reset(self):
        # In test mode, the game is simply initialized. In train mode, if the game
        # is in terminal state due to a life loss but not yet game over, then only
        # life loss flag is reset so that the next game starts from the current
        # state. Otherwise, the game is simply initialized.
        if (self.mode == 'test' or not self.life_lost or self.ale.game_over()):
            # `reset` called in a middle of episode  # all lives are lost
            self.ale.reset_game()
        self.life_lost = False
        return self.getScreen(), self.isTerminal()

    def step(self, action):
        lives = self.ale.lives()
        reward = self.ale.act(self.actions[action])
        self.life_lost = (not lives == self.ale.lives())
        self.score += reward
        self.current_state = self.getScreen()
        self.history.add(self.current_state)
        self.terminal = self.isTerminal()
        return reward, self.history.get(), self.terminal

    def getScreen(self):
        screen = self.ale.getScreenGrayscale()
        #print 'screen:\n',type(screen)
        #print 'screen.shape',screen.shape
        resized = cv2.resize(screen / 255.,
                             (self.screen_width, self.screen_height))

        #cv2.imshow("Image", screen)
        '''
		cv2.namedWindow("Image")
		cv2.destroyAllWindows()
		'''
        return resized

    def isTerminal(self):
        if self.mode == 'train':
            return self.ale.game_over() or self.life_lost
        return self.ale.game_over()
# doc/examples/sharedLibraryInterfaceExample.cpp

import sys
from ale_python_interface import ALEInterface
import numpy as np

if (len(sys.argv) < 2):
    print("Usage ./ale_python_test1.py <ROM_FILE_NAME>")
    sys.exit()

ale = ALEInterface()

max_frames_per_episode = ale.getInt("max_num_frames_per_episode")
ale.set("random_seed", 123)

random_seed = ale.getInt("random_seed")
print("random_seed: " + str(random_seed))

ale.loadROM(sys.argv[1])
legal_actions = ale.getLegalActionSet()

for episode in range(10):
    total_reward = 0.0
    while not ale.game_over():
        a = legal_actions[np.random.randint(legal_actions.size)]
        reward = ale.act(a)
        total_reward += reward
    print("Episode " + str(episode) + " ended with score: " +
          str(total_reward))
    ale.reset_game()
Esempio n. 34
0
class ArcadeLearningEnvironment(Environment):
    """
    [Arcade Learning Environment](https://github.com/mgbellemare/Arcade-Learning-Environment)
    adapter (specification key: `ale`, `arcade_learning_environment`).

    May require:
    ```bash
    sudo apt-get install libsdl1.2-dev libsdl-gfx1.2-dev libsdl-image1.2-dev cmake

    git clone https://github.com/mgbellemare/Arcade-Learning-Environment.git
    cd Arcade-Learning-Environment

    mkdir build && cd build
    cmake -DUSE_SDL=ON -DUSE_RLGLUE=OFF -DBUILD_EXAMPLES=ON ..
    make -j 4
    cd ..

    pip3 install .
    ```

    Args:
        level (string): ALE rom file
            (<span style="color:#C00000"><b>required</b></span>).
        loss_of_life_termination: Signals a terminal state on loss of life
            (<span style="color:#00C000"><b>default</b></span>: false).
        loss_of_life_reward (float): Reward/Penalty on loss of life (negative values are a penalty)
            (<span style="color:#00C000"><b>default</b></span>: 0.0).
        repeat_action_probability (float): Repeats last action with given probability
            (<span style="color:#00C000"><b>default</b></span>: 0.0).
        visualize (bool): Whether to visualize interaction
            (<span style="color:#00C000"><b>default</b></span>: false).
        frame_skip (int > 0): Number of times to repeat an action without observing
            (<span style="color:#00C000"><b>default</b></span>: 1).
        seed (int): Random seed
            (<span style="color:#00C000"><b>default</b></span>: none).
    """

    def __init__(
        self, level, life_loss_terminal=False, life_loss_punishment=0.0,
        repeat_action_probability=0.0, visualize=False, frame_skip=1, seed=None
    ):
        from ale_python_interface import ALEInterface

        self.environment = ALEInterface()
        self.rom_file = level

        self.life_loss_terminal = life_loss_terminal
        self.life_loss_punishment = life_loss_punishment

        self.environment.setFloat(b'repeat_action_probability', repeat_action_probability)
        self.environment.setBool(b'display_screen', visualize)
        self.environment.setInt(b'frame_skip', frame_skip)
        if seed is not None:
            self.environment.setInt(b'random_seed', seed)

        # All set commands must be done before loading the ROM.
        self.environment.loadROM(rom_file=self.rom_file.encode())
        self.available_actions = tuple(self.environment.getLegalActionSet())

        # Full list of actions:
        # No-Op, Fire, Up, Right, Left, Down, Up Right, Up Left, Down Right, Down Left, Up Fire,
        # Right Fire, Left Fire, Down Fire, Up Right Fire, Up Left Fire, Down Right Fire, Down Left
        # Fire

    def __str__(self):
        return super().__str__() + '({})'.format(self.rom_file)

    def states(self):
        width, height = self.environment.getScreenDims()
        return dict(type='float', shape=(height, width, 3))

    def actions(self):
        return dict(type='int', num_values=len(self.available_actions))

    def close(self):
        self.environment.__del__()
        self.environment = None

    def get_states(self):
        screen = np.copy(self.environment.getScreenRGB(screen_data=self.screen))
        screen = screen.astype(dtype=np.float32) / 255.0
        return screen

    def reset(self):
        self.environment.reset_game()
        width, height = self.environment.getScreenDims()
        self.screen = np.empty((height, width, 3), dtype=np.uint8)
        self.lives = self.environment.lives()
        return self.get_states()

    def execute(self, actions):
        reward = self.environment.act(action=self.available_actions[actions])
        terminal = self.environment.game_over()
        states = self.get_states()

        next_lives = self.environment.lives()
        if next_lives < self.lives:
            if self.life_loss_terminal:
                terminal = True
            elif self.life_loss_punishment > 0.0:
                reward -= self.life_loss_punishment
            self.lives = next_lives

        return states, terminal, reward
Esempio n. 35
0
def main():

    pygame.init()

    ale = ALEInterface()
    ale.setInt(b'random_seed', 123)
    ale.setBool(b'display_screen', True)
    ale.setInt(b'frame_skip', 4)
    # ale.setFloat(b'repeat_action_probability', .7)
    # ale.setBool(b'color_averaging', True)

    game = 'breakout'  #ACKTR tasks#, 'space_invaders', 'seaquest', 'qbert', 'pong', 'beam_rider', 'breakout'
    rom = home + '/Documents/ALE/roms/supported/' + game + '.bin'
    ale.loadROM(str.encode(rom))

    legal_actions = ale.getLegalActionSet()
    rewards, num_episodes = [], 5

    config = []
    agent = DQN_agent(config)

    for episode in range(num_episodes):
        total_reward = 0

        exp_state = []
        exp_action = 0
        exp_reward = 0
        exp_next_state = []
        while not ale.game_over():

            #Save frame
            frame = ale.getScreenGrayscale()
            frame = cv2.resize(frame, (84, 84))
            exp_next_state.append(frame)
            #Make action
            action = random.choice(legal_actions)
            reward = ale.act(action)
            total_reward += reward
            exp_reward += exp_reward
            #Make experience
            if len(exp_next_state) == 4:
                state_ready = np.reshape(np.stack(exp_next_state),
                                         [4 * 84, 84])
                # cv2.imshow('image',state_ready)
                # cv2.waitKey(0)
                exp_action = action
                if len(exp_state) == 0:
                    exp_state = exp_next_state
                else:
                    experience = [
                        exp_state, exp_action, exp_reward, exp_next_state
                    ]
                    exp_reward = 0
                    exp_state = exp_next_state
                    exp_next_state = []

        print('Episode %d reward %d.' % (episode, total_reward))
        rewards.append(total_reward)
        ale.reset_game()

    average = sum(rewards) / len(rewards)
    print('Average for %d episodes: %d' % (num_episodes, average))
Esempio n. 36
0
class AtariEnvironment:
    num_actions = 18  # Use full action set

    def __init__(self, frame_shape, frame_postprocess=lambda x: x):
        self.ale = ALEInterface()
        self.ale.setBool(b"display_screen", cfg.display_screen)
        self.ale.setInt(b"frame_skip", 1)
        self.ale.setBool(b"color_averaging", False)
        self.ale.setInt(b"random_seed", cfg.random_seed)
        self.ale.setFloat(b"repeat_action_probability", cfg.sticky_prob)

        self.ale.loadROM(str.encode(cfg.rom))

        self.ale.setMode(cfg.mode)
        self.ale.setDifficulty(cfg.difficulty)

        self.action_set = self.ale.getLegalActionSet()
        assert len(self.action_set) == AtariEnvironment.num_actions

        screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1,)
        self._frame_buffer = CircularBuffer(
            cfg.frame_buffer_size, screen_dims, np.uint8
        )
        self._frame_stack = CircularBuffer(
            cfg.frame_history_size, frame_shape, np.uint8
        )
        self._frame_postprocess = frame_postprocess

        self._episode_count = 0
        self.reset(inc_episode_count=False)

    def _is_terminal(self):
        return self.ale.game_over()

    def _get_single_frame(self):
        stacked_frames = np.concatenate(self._frame_buffer, axis=2)
        maxed_frame = np.amax(stacked_frames, axis=2)
        expanded_frame = np.expand_dims(maxed_frame, 3)
        frame = self._frame_postprocess(expanded_frame)

        return frame

    def reset(self, inc_episode_count=True):
        self._episode_frames = 0
        self._episode_reward = 0
        if inc_episode_count:
            self._episode_count += 1

        self.ale.reset_game()
        for _ in range(cfg.frame_buffer_size):
            self._frame_buffer.append(self.ale.getScreenGrayscale())
        for _ in range(cfg.frame_history_size):
            self._frame_stack.append(self._get_single_frame())

    def act(self, action):
        assert not self._is_terminal()

        cum_reward = 0
        for _ in range(cfg.frame_skip):
            cum_reward += self.ale.act(self.action_set[action])
            self._frame_buffer.append(self.ale.getScreenGrayscale())

        self._frame_stack.append(self._get_single_frame())
        self._episode_frames += cfg.frame_skip
        self._episode_reward += cum_reward
        cum_reward = np.clip(cum_reward, -1, 1)

        return cum_reward, self.state, self._is_terminal()

    @property
    def state(self):
        assert len(self._frame_buffer) == cfg.frame_buffer_size
        assert len(self._frame_stack) == cfg.frame_history_size
        return np.concatenate(self._frame_stack, axis=-1)

    @property
    def episode_reward(self):
        return self._episode_reward

    @property
    def episode_frames(self):
        return self._episode_frames

    @property
    def episode_steps(self):
        return self._episode_frames // cfg.frame_skip

    @property
    def episode_count(self):
        return self._episode_count
Esempio n. 37
0
class AtariEnvironment:
    def __init__(self, seed=1, record=False):
        self.ale = ALEInterface()
        self.ale.setBool(b'display_screen', FLAGS.display_screen or record)
        self.ale.setInt(b'frame_skip', 1)
        self.ale.setBool(b'color_averaging', False)
        self.ale.setInt(b'random_seed', seed)
        self.ale.setFloat(b'repeat_action_probability', FLAGS.sticky_prob)
        self.ale.setInt(b'max_num_frames_per_episode', FLAGS.max_num_frames_per_episode)

        if record:
            if not tf.gfile.Exists(FLAGS.record_dir):
                tf.gfile.MakeDirs(FLAGS.record_dir)
            self.ale.setBool(b'sound', True)
            self.ale.setString(b'record_screen_dir', str.encode(FLAGS.record_dir))
            self.ale.setString(b'record_sound_filename', str.encode(FLAGS.record_dir + '/sound.wav'))
            self.ale.setInt(b'fragsize', 64)

        self.ale.loadROM(str.encode(FLAGS.rom))

        self.ale.setMode(FLAGS.mode)
        self.ale.setDifficulty(FLAGS.difficulty)

        self.action_set = self.ale.getLegalActionSet()

        screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1,)
        self._frame_buffer = CircularBuffer(FLAGS.frame_buffer_size, screen_dims, np.uint8)

        self.reset()

    def _is_terminal(self):
        return self.ale.game_over()

    def _get_single_frame(self):
        stacked_frames = np.concatenate(self._frame_buffer, axis=2)
        maxed_frame = np.amax(stacked_frames, axis=2)
        expanded_frame = np.expand_dims(maxed_frame, 3)

        return expanded_frame

    def reset(self):
        self._episode_frames = 0
        self._episode_reward = 0

        self.ale.reset_game()
        for _ in range(FLAGS.frame_buffer_size):
            self._frame_buffer.append(self.ale.getScreenGrayscale())

    def act(self, action):
        assert not self._is_terminal()

        cum_reward = 0
        for _ in range(FLAGS.frame_skip):
            cum_reward += self.ale.act(self.action_set[action])
            self._frame_buffer.append(self.ale.getScreenGrayscale())

        self._episode_frames += FLAGS.frame_skip
        self._episode_reward += cum_reward
        cum_reward = np.clip(cum_reward, -1, 1)

        return cum_reward, self._get_single_frame(), self._is_terminal()

    def state(self):
        assert len(self._frame_buffer) == FLAGS.frame_buffer_size
        return self._get_single_frame()

    def num_actions(self):
        return len(self.action_set)

    def episode_reward(self):
        return self._episode_reward

    def episode_frames(self):
        return self._episode_frames

    def frame_skip(self):
        return FLAGS.frame_skip
Esempio n. 38
0
class AleAgent:
    ##
    # @param processing_cls Class for processing game visual unput
    def __init__(self, processing_cls, game_rom=None, encoder_model=None, encoder_weights=None, NFQ_model=None, NFQ_weights=None):
        assert game_rom is not None
        self.game = ALEInterface()
        if encoder_weights is not None and encoder_model is not None:
            self.encoder = Encoder(path_to_model=encoder_model, path_to_weights=encoder_weights)
        else:
            self.encoder = Encoder()

        self.processor = processing_cls()

        # Get & Set the desired settings
        self.game.setInt('random_seed', 0)
        self.game.setInt('frame_skip', 4)

        # Set USE_SDL to true to display the screen. ALE must be compilied
        # with SDL enabled for this to work. On OSX, pygame init is used to
        # proxy-call SDL_main.
        USE_SDL = True

        if USE_SDL:
            if sys.platform == 'darwin':
                pygame.init()
                self.game.setBool('sound', False)   # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.game.setBool('sound', False)   # no sound

            self.game.setBool('display_screen', True)

        # Load the ROM file
        self.game.loadROM(game_rom)

        # Get the list of legal actions
        self.legal_actions = self.game.getLegalActionSet()

        # Get actions applicable in current game
        self.minimal_actions = self.game.getMinimalActionSet()

        if NFQ_model is not None and NFQ_weights is not None:
            self.NFQ = NFQ(
                self.encoder.out_dim,
                len(self.minimal_actions),
                model_path=NFQ_model,
                weights_path=NFQ_weights
            )
        else:
            self.NFQ = NFQ(self.encoder.out_dim, len(self.minimal_actions))

        (self.screen_width, self.screen_height) = self.game.getScreenDims()
        self.screen_data = np.zeros(
            (self.screen_height, self.screen_width),
            dtype=np.uint8
        )

    ##
    # Initialize the reinforcement learning
    def train(self, num_of_episodes=1500, eps=0.995, key_binding=None):
        pygame.init()
        for episode in xrange(num_of_episodes):
            total_reward = 0
            moves = 0
            hits = 0
            print 'Starting episode: ', episode+1

            if key_binding:
                eps = 0.05
            else:
                eps -= 2/num_of_episodes

            self.game.getScreenGrayscale(self.screen_data)
            pooled_data = self.processor.process(self.screen_data)
            next_state = self.encoder.encode(pooled_data)
            while not self.game.game_over():
                current_state = next_state
                x = None

                if key_binding:
                    key_pressed = pygame.key.get_pressed()
                    x = key_binding(key_pressed)

                if x is None:
                    r = np.random.rand()
                    if r < eps:
                        x = np.random.randint(self.minimal_actions.size)
                    else:
                        x = self.NFQ.predict_action(current_state)

                a = self.minimal_actions[x]
                # Apply an action and get the resulting reward
                reward = self.game.act(a)

                # record only every 3 frames
                # if not moves % 3:
                self.game.getScreenGrayscale(self.screen_data)
                pooled_data = self.processor.process(self.screen_data)
                next_state = self.encoder.encode(pooled_data)
                transition = np.append(current_state, x)
                transition = np.append(transition, next_state)
                transition = np.append(transition, reward)
                self.NFQ.add_transition(transition)

                total_reward += reward
                if reward > 0:
                    hits += 1

                moves += 1
                if eps > 0.1:
                    eps -= 0.00001
            # end while

            print 'Epsilon: ', eps
            print 'Episode', episode+1, 'ended with score:', total_reward
            print 'Hits: ', hits
            self.game.reset_game()
            self.NFQ.train()
            hits = 0
            moves = 0
            self.NFQ.save_net()
        # end for

    ##
    # Play the game!
    def play(self):
        total_reward = 0
        moves = 1
        while not self.game.game_over():
            self.game.getScreenGrayscale(self.screen_data)
            pooled_data = self.processor.process(self.screen_data)
            current_state = self.encoder.encode(pooled_data)

            x = self.NFQ.predict_action(current_state)
            a = self.minimal_actions[x]
            reward = self.game.act(a)
            total_reward += reward
            moves += 1

        print 'The game ended with score:', total_reward, ' after: ', moves, ' moves'
Esempio n. 39
0
class KungFuMaster(object):
    def __init__(
            self,
            rom='/home/josema/AI/ALE/Arcade-Learning-Environment/Roms/kung_fu_master.bin',
            trainsessionname='test'):

        self.agent = None
        self.isAuto = True
        self.gui_visible = False
        self.userquit = False
        self.optimalPolicyUser = False  # optimal policy set by user
        self.trainsessionname = trainsessionname
        self.elapsedtime = 0  # elapsed time for this experiment

        self.keys = 0

        # Configuration
        self.pause = False  # game is paused
        self.debug = False

        self.sleeptime = 0.0
        self.command = 0
        self.iteration = 0
        self.cumreward = 0
        self.cumreward100 = 0  # cum reward for statistics
        self.cumscore100 = 0
        self.ngoalreached = 0
        self.max_level = 1

        self.hiscore = 0
        self.hireward = -1000000
        self.resfile = open("data/" + self.trainsessionname + ".dat", "a+")

        self.legal_actions = 0
        self.rom = rom
        self.key_status = []

    def init(self, agent):  # init after creation (uses args set from cli)
        self.ale = ALEInterface()
        self.ale.setInt('random_seed', 123)
        ram_size = self.ale.getRAMSize()
        self.ram = np.zeros((ram_size), dtype=np.uint8)

        if (self.gui_visible):
            os.environ['SDL_VIDEO_CENTERED'] = '1'
            if sys.platform == 'darwin':
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                pygame.init()

                self.ale.setBool('sound', True)
                self.ale.setBool('display_screen', False)

        self.ale.loadROM(self.rom)
        self.legal_actions = self.ale.getLegalActionSet()

        if (self.gui_visible):
            (self.screen_width, self.screen_height) = self.ale.getScreenDims()
            print("width/height: " + str(self.screen_width) + "/" +
                  str(self.screen_height))

            (display_width, display_height) = (1024, 420)
            self.screen = pygame.display.set_mode(
                (display_width, display_height))

            pygame.display.set_caption(
                "Reinforcement Learning - Sapienza - Jose M Salas")
            self.numpy_surface = np.zeros(
                (self.screen_height, self.screen_width, 3), dtype=np.uint8)

            self.game_surface = pygame.Surface(
                (self.screen_width, self.screen_height))

            pygame.display.flip()
            #init clock
            self.clock = pygame.time.Clock()

        self.agent = agent
        self.nactions = len(
            self.legal_actions
        )  # 0: not moving, 1: left, 2: right, 3: up, 4: down
        for i in range(self.nactions):
            self.key_status.append(False)

        print(self.nactions)
        #        ns = 89999 # Number of statuses if we use enemy type ram info without level number
        #FINAL        ns = 489999 # Number of statuses if we use enemy type ram info
        ns = 4899999  # Number of statuses if we use enemy type ram info

        #        ns = 48999
        print('Number of states: %d' % ns)
        self.agent.init(ns, self.nactions)  # 1 for RA not used here

    def initScreen(self):

        if (self.gui_visible):
            if sys.platform == 'darwin':
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                pygame.init()

                self.ale.setBool('sound', True)
                self.ale.setBool('display_screen', False)
        if (self.gui_visible):
            (self.screen_width, self.screen_height) = self.ale.getScreenDims()
            print("width/height: " + str(self.screen_width) + "/" +
                  str(self.screen_height))

            (display_width, display_height) = (1024, 420)
            self.screen = pygame.display.set_mode(
                (display_width, display_height))

            pygame.display.set_caption(
                "Reinforcement Learning - Sapienza - Jose M Salas")
            self.numpy_surface = np.zeros(
                (self.screen_height, self.screen_width, 3), dtype=np.uint8)

            self.game_surface = pygame.Surface(
                (self.screen_width, self.screen_height))

            pygame.display.flip()
            #init clock
            self.clock = pygame.time.Clock()

    def reset(self):
        self.pos_x = 0
        self.pos_y = 0
        # Kung fu master observations
        self.enemy_pos = 0
        self.n_enemies = 0
        self.my_pos = 0
        self.danger_pos = 0
        self.danger_type = 0
        self.enemy_type = 0  # 0, 1, 2, 3, 80, 81, 82, 40
        self.blocked = 0
        self.prev_blocked = 0
        self.hold_hit = 0
        self.time_left1 = 0
        self.time_left2 = 0
        self.my_energy = 39
        self.previous_my_energy = 39
        self.lifes = 3
        self.previous_lifes = 3
        self.got_hit = 0
        self.got_blocked = 0
        self.got_unblocked = 0
        self.still_blocked = False
        self.starting_pos = 0
        self.level = 1

        self.score = 0
        self.cumreward = 0
        self.cumscore = 0
        self.action_reward = 0

        self.current_reward = 0  # accumulate reward over all events happened during this action until next different state

        self.prev_state = None  # previous state
        self.firstAction = True  # first action of the episode
        self.finished = False  # episode finished
        self.newstate = True  # new state reached
        self.numactions = 0  # number of actions in this episode
        self.iteration += 1

        self.agent.optimal = self.optimalPolicyUser or (
            self.iteration % 100
        ) == 0  # False #(random.random() < 0.5)  # choose greedy action selection for the entire episode

    def pair_function(self):
        # Combine the number of enemies, player blocked and danger type information into 7 different states
        if self.n_enemies > 0:
            self.danger_type = 0

    # print (str(self.n_enemies) + " - " + str(self.danger_type) + ' - ' + str(self.blocked))
        pair = (int)(
            (0.5 * (self.n_enemies + self.danger_type) *
             (self.n_enemies + self.danger_type + 1) + self.danger_type + 1) *
            (1 - (self.blocked / 128)))
        if pair > 8:
            return 5  #game not started yet
        else:
            return pair

    def enemy_type_s(self):
        if self.enemy_type > 127:
            return (self.enemy_type - 128 + 4)
        elif self.enemy_type == 64:
            return 8
        else:
            return self.enemy_type

    def getstate(self):

        #        print ('enemy type: ' + str(self.enemy_type_s()) + 'level: ' + str(self.level -1) )
        x = (int)((self.level - 1) * 1000000 + self.pair_function() * 100000 +
                  (self.enemy_type_s() * 10000) +
                  np.rint(self.my_pos / 32) * 1000 +
                  np.rint(self.enemy_pos / 32) * 100 +
                  np.rint(self.danger_pos / 32) * 10 +
                  np.rint(self.hold_hit / 16))
        #3FINAL        x = (int)((self.enemy_type_s()*1000) + (self.level-1)*100000 + self.pair_function()*10000 + np.rint(self.enemy_pos/32)*100 + np.rint(self.danger_pos/32)*10 + np.rint(self.hold_hit/16))

        #2NO LEVEL        x = (int)((self.enemy_type_s()*1000) + self.pair_function()*10000 + np.rint(self.enemy_pos/32)*100 + np.rint(self.danger_pos/32)*10 + np.rint(self.hold_hit/16))
        #1NO ENEMY TYPE        x = (int)((self.level-1)*10000 + self.pair_function()*1000 + np.rint(self.enemy_pos/32)*100 + np.rint(self.danger_pos/32)*10 + np.rint(self.hold_hit/16))

        return x

    def goal_reached(self):

        #return (self.my_energy>0 and self.time_left1==0 and self.time_left2<5) #and self.my_energy==39)
        return (self.level == 5)

    def update(self, a):

        self.command = a
        # Update RAM
        self.ale.getRAM(self.ram)

        # Get info from RAM
        self.enemy_pos = self.ram[72]
        self.n_enemies = self.ram[91]
        self.danger_pos = self.ram[73]
        self.my_pos = self.ram[74]
        self.hold_hit = self.ram[77]

        self.enemy_type = self.ram[54]

        if self.level < self.ram[31]:
            self.starting_pos = self.ram[74]
        self.level = self.ram[31]
        self.max_level = max(self.level, self.max_level)

        # Danger/Enemy position:
        # 49 = no danger
        # 50 = danger approaching from left
        # 208 = danger approaching from right

        # ram[96] = 6, danger comes from top
        # ram[96] = 29, danger comes from bottom
        # ram[96] = 188, none
        if self.ram[96] == 6:
            self.danger_type = 0
        elif self.ram[96] == 29:
            self.danger_type = 1
        else:
            self.danger_type = 2

        self.time_left1 = self.ram[27]
        self.time_left2 = self.ram[28]

        self.previous_my_energy = self.my_energy
        self.my_energy = self.ram[75]

        if self.my_energy < self.previous_my_energy and not self.still_blocked and self.ram[
                34] == 0:
            self.got_hit = STATES['GotHit']
        else:
            self.got_hit = 0

        self.previous_lifes = self.lifes
        self.lifes = self.ram[29]
        self.prev_blocked = self.blocked
        self.blocked = self.ram[61]
        if self.blocked > self.prev_blocked and not self.still_blocked:
            self.got_blocked = STATES['GotBlocked']
            self.still_blocked = True
            self.got_unblocked = 0
        elif self.blocked < self.prev_blocked and self.still_blocked:
            self.got_unblocked = STATES['GotUnblocked']
            self.still_blocked = False
            self.got_blocked = 0
        else:
            self.got_blocked = 0
            self.got_unblocked = 0

#        print ('enemy_pos=' +str(self.enemy_pos) + ' - danger_pos=' + str(self.danger_pos) + ' - my_position='
#               + str(self.my_pos) + ' - my_energy=' + str(self.my_energy) + ' - blocked=' + str(self.blocked) + ' - danger_type=' + str(self.danger_type))

        self.prev_state = self.getstate()  # remember previous state

        # print " == Update start ",self.prev_state," action",self.command

        self.current_reward = 0  # accumulate reward over all events happened during this action until next different state
        #print('self.current_reward = 0')
        self.numactions += 1  # total number of actions axecuted in this episode

        # while (self.prev_state == self.getstate()):

        if (self.firstAction):
            self.starting_pos = self.ram[74]
            self.firstAction = False
            self.current_reward = self.ale.act(a)
        else:
            self.current_reward = self.ale.act(a)

        if self.ram[34] == 0:  #only when playing
            if (a == 3 and self.starting_pos < self.my_pos) or (
                    a == 4 and self.starting_pos > self.my_pos):
                self.action_reward = STATES['MoveFW']
            elif (a == 3 and self.starting_pos > self.my_pos) or (
                    a == 4 and self.starting_pos < self.my_pos):
                self.action_reward = STATES['MoveBW']
            else:
                self.action_reward = STATES['NotMoving']

        self.score += self.current_reward
        self.current_reward += self.action_reward

        #        print('score= ' + str(self.score) + ' current reward=' +str(np.rint(self.current_reward))+ ' - energy=' + str(self.my_energy/39.0) +
        #        ' - got_hot='+ str(self.got_hit) + ' - got_blocked='  + str(self.got_blocked) + ' - got_unblocked=' + str(self.got_unblocked))
        # check if episode terminated

        #self.draw_screen

        if self.goal_reached():
            self.current_reward += STATES['Alive']
            self.ngoalreached += 1
            #self.ale.reset_game()
            self.finished = True

        if (self.ale.game_over()):
            self.current_reward += STATES['Dead']
            if self.level > 1:
                print('game over in level ' + str(self.level))
            if self.my_energy > 0 and self.lifes == 3:
                print('Game over alive????')
            self.ale.reset_game()

            self.finished = True
        if self.level > 2:
            if self.gui_visible == False:
                self.gui_visible = True
                self.initScreen()
        #print " ** Update end ",self.getstate(), " prev ",self.prev_state

    def input(self):
        self.isPressed = False
        if self.gui_visible:

            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    return False

                if event.type == pygame.KEYDOWN:

                    if event.key == pygame.K_SPACE:
                        self.pause = not self.pause
                        print "Game paused: ", self.pause
                    elif event.key == pygame.K_a:
                        self.isAuto = not self.isAuto
                        self.sleeptime = int(self.isAuto) * 0.07
                    elif event.key == pygame.K_s:
                        self.sleeptime = 1.0
                        self.agent.debug = False
                    elif event.key == pygame.K_d:
                        self.sleeptime = 0.07
                        self.agent.debug = False
                    elif event.key == pygame.K_f:
                        self.sleeptime = 0.005
                        self.agent.debug = False
                    elif event.key == pygame.K_g:
                        self.sleeptime = 0.0
                        self.agent.debug = False
                    elif event.key == pygame.K_o:
                        self.optimalPolicyUser = not self.optimalPolicyUser
                        print "Best policy: ", self.optimalPolicyUser
                    elif event.key == pygame.K_q:
                        self.userquit = True
                        print "User quit !!!"
                    else:

                        pressed = pygame.key.get_pressed()

                        self.keys = 0
                        self.keys |= pressed[pygame.K_UP]
                        self.keys |= pressed[pygame.K_DOWN] << 1
                        self.keys |= pressed[pygame.K_LEFT] << 2
                        self.keys |= pressed[pygame.K_RIGHT] << 3
                        self.keys |= pressed[pygame.K_z] << 4
                        self.command = key_action_tform_table[self.keys]
                        self.key_status[self.command] = True

                if event.type == pygame.KEYUP:
                    pressed = pygame.key.get_pressed()

                    self.keys = 0
                    self.keys |= pressed[pygame.K_UP]
                    self.keys |= pressed[pygame.K_DOWN] << 1
                    self.keys |= pressed[pygame.K_LEFT] << 2
                    self.keys |= pressed[pygame.K_RIGHT] << 3
                    self.keys |= pressed[pygame.K_z] << 4
                    self.command = key_action_tform_table[self.keys]
                    self.key_status[self.command] = False
                    if not (True in self.key_status):
                        self.command = 0

        return True

    def getUserAction(self):
        return self.command

    def getreward(self):

        r = np.rint(
            self.current_reward
        ) + self.got_hit + self.got_blocked + self.got_unblocked - np.rint(
            self.blocked / 128)
        self.cumreward += r

        return r

    def print_report(self, printall=False):
        toprint = printall
        ch = ' '
        if (self.agent.optimal):
            ch = '*'
            toprint = True

        s = 'Iter %6d, sc: %3d, l: %d,  na: %4d, r: %5d %c' % (
            self.iteration, self.score, self.level, self.numactions,
            self.cumreward, ch)

        if self.score > self.hiscore:
            self.hiscore = self.score
            s += ' HISCORE '
            toprint = True
        if self.cumreward > self.hireward:
            self.hireward = self.cumreward
            s += ' HIREWARD '
            toprint = True

        if (toprint):
            print(s)

        self.cumreward100 += self.cumreward
        self.cumscore100 += self.score
        numiter = 100
        if (self.iteration % numiter == 0):
            #self.doSave()
            pgoal = float(self.ngoalreached * 100) / numiter
            print(
                '----------------------------------------------------------------------------------------------------------------------'
            )
            print(
                "%s %6d avg last 100: reward %d | score %.2f | level %d | p goals %.1f %%"
                % (self.trainsessionname, self.iteration, self.cumreward100 /
                   100, float(self.cumscore100) / 100, self.max_level, pgoal))
            print(
                '----------------------------------------------------------------------------------------------------------------------'
            )
            self.cumreward100 = 0
            self.cumscore100 = 0
            self.ngoalreached = 0

        sys.stdout.flush()

        self.resfile.write(
            "%d,%d,%d,%d\n" %
            (self.score, self.cumreward, self.goal_reached(), self.numactions))
        self.resfile.flush()

    def draw(self):
        if self.gui_visible:

            self.screen.fill((0, 0, 0))

            self.ale.getScreenRGB(self.numpy_surface)

            pygame.surfarray.blit_array(
                self.game_surface, np.transpose(self.numpy_surface, (1, 0, 2)))
            #        pygame.pixelcopy.array_to_surface(self.game_surface, np.transpose(self.numpy_surface,(1,0,2)))
            self.screen.blit(
                pygame.transform.scale2x(
                    pygame.transform.scale(
                        self.game_surface,
                        (self.screen_height, self.screen_height))), (0, 0))

            #Display ram bytes
            font = pygame.font.SysFont("Ubuntu Mono", 32)
            text = font.render("RAM: ", 1, (255, 208, 208))
            self.screen.blit(text, (430, 10))

            font = pygame.font.SysFont("Ubuntu Mono", 25)
            height = font.get_height() * 1.2

            line_pos = 40
            ram_pos = 0
            while (ram_pos < 128):
                ram_string = ''.join([
                    "%02X " % self.ram[x]
                    for x in range(ram_pos, min(ram_pos + 16, 128))
                ])
                text = font.render(ram_string, 1, (255, 255, 255))
                self.screen.blit(text, (440, line_pos))
                line_pos += height
                ram_pos += 16

            #display current action
            font = pygame.font.SysFont("Ubuntu Mono", 32)
            text = font.render("Current Action: " + str(self.command), 1,
                               (208, 208, 255))
            height = font.get_height() * 1.2
            self.screen.blit(text, (430, line_pos))
            line_pos += height

            #display reward
            font = pygame.font.SysFont("Ubuntu Mono", 30)
            text = font.render("Total Reward: " + str(self.cumreward), 1,
                               (208, 255, 255))
            self.screen.blit(text, (430, line_pos))

            pygame.display.flip()
#            clock.tick(60.)
        else:
            return 0

    def quit(self):
        self.resfile.close()
        pygame.quit()