Esempio n. 1
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    rec_screen = ""
    if "--nn-file" in args:
        temp_params = vars(load_params(args[args.index("--nn-file") + 1]))
        for p in temp_params:
            try:
                vars(defaults)[p.upper()] = temp_params[p]
            except:
                print "warning: parameter", p, "from param file doesn't exist."
        #rec_screen = args[args.index("--nn-file")+1][:-len("last_model.pkl")]+"/frames"

    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    parameters.rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    rng = np.random.RandomState(123456)

    folder_name = None if parameters.folder_name == "" else parameters.folder_name

    ale = ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))
    ale.setBool('display_screen', parameters.display_screen)
    ale.setString('record_screen_dir', rec_screen)
    trainer = Q_Learning(model_params=parameters,
                         ale_env=ale,
                         folder_name=folder_name)
    trainer.train()
Esempio n. 2
0
    def __init__(self,
                 game,
                 seed=None,
                 use_sdl=False,
                 n_last_screens=4,
                 frame_skip=4,
                 treat_life_lost_as_terminal=True,
                 crop_or_scale='scale',
                 max_start_nullops=30,
                 record_screen_dir=None):
        self.n_last_screens = n_last_screens
        self.treat_life_lost_as_terminal = treat_life_lost_as_terminal
        self.crop_or_scale = crop_or_scale
        self.max_start_nullops = max_start_nullops

        # atari_py is used only to provide rom files. atari_py has its own
        # ale_python_interface, but it is obsolete.
        game_path = atari_py.get_game_path(game)

        ale = ALEInterface()
        if seed is not None:
            assert seed >= 0 and seed < 2 ** 16, \
                "ALE's random seed must be represented by unsigned int"
        else:
            # Use numpy's random state
            seed = np.random.randint(0, 2**16)
        ale.setInt(b'random_seed', seed)
        ale.setFloat(b'repeat_action_probability', 0.0)
        ale.setBool(b'color_averaging', False)
        if record_screen_dir is not None:
            ale.setString(b'record_screen_dir',
                          str.encode(str(record_screen_dir)))
        self.frame_skip = frame_skip
        if use_sdl:
            if 'DISPLAY' not in os.environ:
                raise RuntimeError(
                    'Please set DISPLAY environment variable for use_sdl=True')
            # SDL settings below are from the ALE python example
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool(b'sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                ale.setBool(b'sound', True)
            ale.setBool(b'display_screen', True)

        ale.loadROM(str.encode(str(game_path)))

        assert ale.getFrameNumber() == 0

        self.ale = ale
        self.legal_actions = ale.getMinimalActionSet()
        self.initialize()

        self.action_space = spaces.Discrete(len(self.legal_actions))
        one_screen_observation_space = spaces.Box(low=0,
                                                  high=255,
                                                  shape=(84, 84))
        self.observation_space = spaces.Tuple([one_screen_observation_space] *
                                              n_last_screens)
def initializeALE(romFile, rec_dir):
    ale = ALEInterface()

    max_frames_per_episode = ale.getInt("max_num_frames_per_episode")
    ale.setInt("random_seed", 123)
    ale.setFloat("repeat_action_probability", 0.0)
    ale.setInt("frame_skip", 5)
    # Set record flags
    ale.setString(b'record_screen_dir', rec_dir + '/')
    ale.setString("record_sound_filename", rec_dir + "/sound.wav")
    # We set fragsize to 64 to ensure proper sound sync
    ale.setInt("fragsize", 64)

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.

    USE_SDL = False
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', True)

    ale.loadROM(romFile)
    actionSet = ale.getMinimalActionSet()

    return ale, actionSet
Esempio n. 4
0
class Environment:
  def __init__(self, rom_file, args):
    self.ale = ALEInterface()
    if args.display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', args.frame_skip)
    self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
    self.ale.setBool('color_averaging', args.color_averaging)

    if args.random_seed:
      self.ale.setInt('random_seed', args.random_seed)

    if args.record_screen_path:
      if not os.path.exists(args.record_screen_path):
        logger.info("Creating folder %s" % args.record_screen_path)
        os.makedirs(args.record_screen_path)
      logger.info("Recording screens to %s", args.record_screen_path)
      self.ale.setString('record_screen_dir', args.record_screen_path)

    if args.record_sound_filename:
      logger.info("Recording sound to %s", args.record_sound_filename)
      self.ale.setBool('sound', True)
      self.ale.setString('record_sound_filename', args.record_sound_filename)

    self.ale.loadROM(rom_file)

    if args.minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
      logger.info("Using minimal action set with size %d" % len(self.actions))
    else:
      self.actions = self.ale.getLegalActionSet()
      logger.info("Using full action set with size %d" % len(self.actions))
    logger.debug("Actions: " + str(self.actions))

    self.dims = (args.screen_height, args.screen_width)

  def numActions(self):
    return len(self.actions)

  def restart(self):
    self.ale.reset_game()

  def act(self, action):
    reward = self.ale.act(self.actions[action])
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    return resized

  def isTerminal(self):
    return self.ale.game_over()
Esempio n. 5
0
class Emulator:
    def __init__(self):

        self.ale = ALEInterface()

        # turn off the sound
        self.ale.setBool('sound', False)

        self.ale.setBool('display_screen', EMULATOR_DISPLAY)

        self.ale.setInt('frame_skip', FRAME_SKIP)
        self.ale.setFloat('repeat_action_probability',
                          REPEAT_ACTION_PROBABILITY)
        self.ale.setBool('color_averaging', COLOR_AVERAGING)

        self.ale.setInt('random_seed', RANDOM_SEED)

        if RECORD_SCENE_PATH:
            self.ale.setString('record_screen_dir', RECORD_SCENE_PATH)

        self.ale.loadROM(ROM_PATH)

        self.actions = self.ale.getMinimalActionSet()
        logger.info("Actions: " + str(self.actions))

        self.dims = DIMS
        #self.start_lives = self.ale.lives()

    def getActions(self):
        return self.actions

    def numActions(self):
        return len(self.actions)

    def restart(self):
        self.ale.reset_game()
        # can be omitted

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def getScreen(self):
        # why grayscale ?
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        # normalize
        #resized /= COLOR_SCALE

        return resized

    def isTerminal(self):
        # while training deepmind only ends when agent dies
        #terminate = DEATH_END and TRAIN and (self.ale.lives() < self.start_lives)

        return self.ale.game_over()
Esempio n. 6
0
class Emulator:
    def __init__(self):
    
        self.ale = ALEInterface()
        
        # turn off the sound
        self.ale.setBool('sound', False)
        
        self.ale.setBool('display_screen', EMULATOR_DISPLAY)

        self.ale.setInt('frame_skip', FRAME_SKIP)
        self.ale.setFloat('repeat_action_probability', REPEAT_ACTION_PROBABILITY)
        self.ale.setBool('color_averaging', COLOR_AVERAGING)

        self.ale.setInt('random_seed', RANDOM_SEED)

        if RECORD_SCENE_PATH:
            self.ale.setString('record_screen_dir', RECORD_SCENE_PATH)


        self.ale.loadROM(ROM_PATH)

        self.actions = self.ale.getMinimalActionSet()
        logger.info("Actions: " + str(self.actions))

        self.dims = DIMS
        #self.start_lives = self.ale.lives()

    def getActions(self):
        return self.actions

    def numActions(self):
        return len(self.actions)

    def restart(self):
        self.ale.reset_game()
        # can be omitted

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def getScreen(self):
        # why grayscale ?
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        # normalize
        #resized /= COLOR_SCALE

        return resized

    def isTerminal(self):
        # while training deepmind only ends when agent dies
        #terminate = DEATH_END and TRAIN and (self.ale.lives() < self.start_lives)

        return self.ale.game_over()
Esempio n. 7
0
def init(game, display_screen=False, record_dir=None):
    if display_screen:
        import pygame
        pygame.init()
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    if record_dir is not None:
        ale.setString("record_screen_dir", record_dir)
    ale.loadROM('{game}.bin'.format(game=game))
    ale.setFloat("repeat_action_probability", 0)

    return ale
Esempio n. 8
0
def init(game, display_screen=False, record_dir=None):
    if display_screen:
        import pygame
        pygame.init()
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    if record_dir is not None:
        ale.setString("record_screen_dir", record_dir)
    ale.loadROM('{game}.bin'.format(game=game))
    ale.setFloat("repeat_action_probability", 0)

    return ale
Esempio n. 9
0
def init(display_screen=False, record_dir=None):
    if display_screen:
        import pygame
        pygame.init()
    rom_path = '.'
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    if record_dir is not None:
        ale.setString("record_screen_dir", record_dir)
    ale.loadROM(rom_path + '/space_invaders.bin')
    ale.setFloat("repeat_action_probability", 0)

    return ale
Esempio n. 10
0
def init(display_screen=False, record_dir=None):
    if display_screen:
        import pygame
        pygame.init()
    rom_path = '.'
    ale = ALEInterface()
    ale.setBool('display_screen', display_screen)
    ale.setInt('random_seed', 123)
    if record_dir is not None:
        ale.setString("record_screen_dir", record_dir)
    ale.loadROM(rom_path + '/space_invaders.bin')
    ale.setFloat("repeat_action_probability", 0)

    return ale
Esempio n. 11
0
    def __init__(self,
                 rom_path,
                 n_last_screens=4,
                 frame_skip=4,
                 treat_life_lost_as_terminal=True,
                 crop_or_scale='scale',
                 max_start_nullops=30,
                 record_screen_dir=None,
                 render=False,
                 max_episode_length=None,
                 max_time=None):
        self.frame_skip = frame_skip
        self.n_last_screens = n_last_screens
        self.treat_life_lost_as_terminal = treat_life_lost_as_terminal
        self.crop_or_scale = crop_or_scale
        self.max_start_nullops = max_start_nullops
        self.max_episode_length = max_episode_length
        self.max_time = max_time

        ale = ALEInterface()
        # Use numpy's random state
        seed = np.random.randint(0, 2**16)
        ale.setInt(b'random_seed', seed)
        ale.setFloat(b'repeat_action_probability', 0.0)
        ale.setBool(b'color_averaging', False)

        if record_screen_dir is not None:
            ale.setString(b'record_screen_dir', str.encode(record_screen_dir))

        if render:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool(b'sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                ale.setBool(b'sound', True)
            ale.setBool(b'display_screen', True)

        ale.loadROM(str.encode(rom_path))

        self.ale = ale
        self.__exceed_max = False
        self.legal_actions = ale.getMinimalActionSet()
        self.reset()
Esempio n. 12
0
    def __init__(self, rom_filename, seed=None, use_sdl=False, n_last_screens=4,
                 frame_skip=4, treat_life_lost_as_terminal=True,
                 crop_or_scale='scale', max_start_nullops=30,
                 record_screen_dir=None):
        self.n_last_screens = n_last_screens
        self.treat_life_lost_as_terminal = treat_life_lost_as_terminal
        self.crop_or_scale = crop_or_scale
        self.max_start_nullops = max_start_nullops

        ale = ALEInterface()
        if seed is not None:
            assert seed >= 0 and seed < 2 ** 16, \
                "ALE's random seed must be represented by unsigned int"
        else:
            # Use numpy's random state
            seed = np.random.randint(0, 2 ** 16)
        ale.setInt(b'random_seed', seed)
        ale.setFloat(b'repeat_action_probability', 0.0)
        ale.setBool(b'color_averaging', False)
        if record_screen_dir is not None:
            ale.setString(b'record_screen_dir', str.encode(record_screen_dir))
        self.frame_skip = frame_skip
        if use_sdl:
            if 'DISPLAY' not in os.environ:
                raise RuntimeError(
                    'Please set DISPLAY environment variable for use_sdl=True')
            # SDL settings below are from the ALE python example
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool(b'sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                ale.setBool(b'sound', True)
            ale.setBool(b'display_screen', True)
        ale.loadROM(str.encode(rom_filename))

        assert ale.getFrameNumber() == 0


        self.ale = ale
        self.legal_actions = ale.getMinimalActionSet()
        self.initialize()
Esempio n. 13
0
    def _init_ale(self):
        ale = ALEInterface()
        ale.setBool('sound', self.play_sound)
        ale.setBool('display_screen', self.display_screen)
        ale.setInt('random_seed', self.random_seed)

        # Frame skip is implemented separately
        ale.setInt('frame_skip', 1)
        ale.setBool('color_averaging', False)
        ale.setFloat('repeat_action_probability', 0.0)
        # Somehow this repeat_action_probability has unexpected effect on game.
        # The larger this value is, the more frames games take to restart.
        # And when 1.0 games completely hang
        # We are setting the default value of 0.0 here, expecting that
        # it has no effect as frame_skip == 1
        # This action repeating is agent's concern
        # so we do not implement an equivalent in our wrapper.

        if self.record_screen_path:
            _LG.info('Recording screens: %s', self.record_screen_path)
            if not os.path.exists(self.record_screen_path):
                os.makedirs(self.record_screen_path)
            ale.setString('record_screen_dir', self.record_screen_path)

        if self.record_sound_filename:
            _LG.info('Recording sound: %s', self.record_sound_filename)
            record_sound_dir = os.path.dirname(self.record_sound_filename)
            if not os.path.exists(record_sound_dir):
                os.makedirs(record_sound_dir)
            ale.setBool('sound', True)
            ale.setString('record_sound_filename', self.record_sound_filename)

        ale.loadROM(self.rom_path)

        self._ale = ale
        self._actions = (ale.getMinimalActionSet() if self.minimal_action_set
                         else ale.getLegalActionSet())
Esempio n. 14
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    NOTE: will automatically restart when a real episode ends
    """
    def __init__(self,
                 rom_file,
                 viz=0,
                 height_range=(None, None),
                 frame_skip=4,
                 image_shape=(84, 84),
                 nullop_start=30,
                 live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = os.path.join(get_dataset_dir('atari_rom'), rom_file)
        assert os.path.isfile(rom_file), \
                "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)

            self.ale.setInt("random_seed", self.rng.randint(0, 10000))
            self.ale.setBool("showinfo", False)

            self.ale.setInt("frame_skip", 1)
            self.ale.setBool('color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat('repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString('record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file)
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) float32 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                #m = cv2.resize(ret, (1920,1200))
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1], :].astype(
            'float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_action_space(self):
        return DiscreteActionSpace(len(self.actions))

    def restart_episode(self):
        if self.current_episode_score.count > 0:
            self.stats['score'].append(self.current_episode_score.sum)
        self.current_episode_score.reset()
        self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if isOver:
            self.restart_episode()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        return (r, isOver)
Esempio n. 15
0
class AtariEnv(object):
    def __init__(self,
                 frame_skip=None,
                 repeat_action_probability=0.0,
                 state_shape=[84, 84],
                 rom_path=None,
                 game_name='pong',
                 random_state=None,
                 rendering=False,
                 record_dir=None,
                 obs_showing=False,
                 channel_weights=[0.5870, 0.2989, 0.1140]):
        self.ale = ALEInterface()
        self.frame_skip = frame_skip
        self.state_shape = state_shape
        if random_state is None:
            random_state = np.random.RandomState(1234)
        self.rng = random_state
        self.channel_weights = channel_weights
        self.ale.setInt(b'random_seed', self.rng.randint(1000))
        self.ale.setFloat(b'repeat_action_probability',
                          repeat_action_probability)
        self.ale.setBool(b'color_averaging', False)
        if rendering:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool(b'sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool(b'sound', True)
            self.ale.setBool(b'display_screen', True)
        if rendering and record_dir is not None:  # should be before loadROM
            self.ale.setString(b'record_screen_dir', record_dir.encode())
            self.ale.setString(b'record_sound_filename',
                               os.path.join(record_dir, '/sound.wav').encode())
            self.ale.setInt(b'fragsize',
                            64)  # to ensure proper sound sync (see ALE doc)
        self.ale.loadROM(str.encode(rom_path + game_name + '.bin'))
        self.legal_actions = self.ale.getMinimalActionSet()
        self.nb_actions = len(self.legal_actions)
        (self.screen_width, self.screen_height) = self.ale.getScreenDims()
        self._buffer = np.empty((self.screen_height, self.screen_width, 3),
                                dtype=np.uint8)

        self.obs_showing = obs_showing

    def reset(self):
        self.ale.reset_game()
        return self.get_state()

    def step(self, action):
        reward = 0.0
        if self.frame_skip is None:
            num_steps = 1
        elif isinstance(self.frame_skip, int):
            num_steps = self.frame_skip
        else:
            num_steps = self.rng.randint(self.frame_skip[0],
                                         self.frame_skip[1])
        for i in range(num_steps):
            reward += self.ale.act(self.legal_actions[action])
        return self.get_state(), reward, self.ale.game_over(), {}

    def _get_image(self):
        self.ale.getScreenRGB(self._buffer)
        gray = self.channel_weights[0] * self._buffer[:, :, 0] + self.channel_weights[1] * self._buffer[:, :, 1] + \
           self.channel_weights[2] * self._buffer[:, :, 2]
        x = cv2.resize(gray,
                       tuple(self.state_shape),
                       interpolation=cv2.INTER_LINEAR)
        return x

    def get_state(self):
        return self._get_image()

    def get_lives(self):
        return self.ale.lives()
Esempio n. 16
0
class AtariEnvironment(interfaces.Environment):
    def __init__(self,
                 atari_rom,
                 frame_skip=4,
                 noop_max=30,
                 terminate_on_end_life=False,
                 random_seed=123,
                 frame_history_length=4,
                 use_gui=False,
                 max_num_frames=500000,
                 repeat_action_probability=0.0,
                 record_screen_dir=None):
        self.ale = ALEInterface()
        self.ale.setInt('random_seed', random_seed)
        self.ale.setInt('frame_skip', 1)
        self.ale.setFloat('repeat_action_probability', 0.0)
        self.ale.setInt('max_num_frames_per_episode', max_num_frames)
        if record_screen_dir is not None:
            self.ale.setString('record_screen_dir', record_screen_dir)
        self.ale.loadROM(atari_rom)
        self.frame_skip = frame_skip
        self.repeat_action_probability = repeat_action_probability
        self.noop_max = noop_max
        self.terminate_on_end_life = terminate_on_end_life
        self.current_lives = self.ale.lives()
        self.is_terminal = False
        self.previous_action = 0
        self.num_actions = len(self.ale.getMinimalActionSet())

        w, h = self.ale.getScreenDims()
        self.screen_width = w
        self.screen_height = h
        self.zero_last_frames = [
            np.zeros((84, 84), dtype=np.uint8),
            np.zeros((84, 84), dtype=np.uint8)
        ]
        self.last_two_frames = copy.copy(self.zero_last_frames)
        self.zero_history_frames = [
            np.zeros((84, 84), dtype=np.uint8)
            for i in range(0, frame_history_length)
        ]
        self.frame_history = copy.copy(self.zero_history_frames)
        atari_actions = self.ale.getMinimalActionSet()
        self.atari_to_onehot = dict(
            list(zip(atari_actions, list(range(len(atari_actions))))))
        self.onehot_to_atari = dict(
            list(zip(list(range(len(atari_actions))), atari_actions)))
        self.screen_image = np.zeros(self.screen_height * self.screen_width,
                                     dtype=np.uint8)

        self.use_gui = use_gui
        self.original_frame = np.zeros((h, w), dtype=np.uint8)
        self.refresh_time = datetime.timedelta(milliseconds=1000 / 60)
        self.last_refresh = datetime.datetime.now()
        if (self.use_gui):
            self.gui_screen = pygame.display.set_mode((w, h))

    def getRAM(self, ram=None):
        return self.ale.getRAM(ram)

    def _get_frame(self):
        self.ale.getScreenGrayscale(self.screen_image)
        image = self.screen_image.reshape(
            [self.screen_height, self.screen_width, 1])
        self.original_frame = image
        image = cv2.resize(image, (84, 84))
        return image

    def perform_action(self, onehot_index_action):
        if self.repeat_action_probability > 0:
            if np.random.uniform() < self.repeat_action_probability:
                onehot_index_action = self.previous_action
            self.previous_action = onehot_index_action
        action = self.onehot_to_atari[onehot_index_action]
        state, action, reward, next_state, self.is_terminal = self.perform_atari_action(
            action)
        return state, onehot_index_action, reward, next_state, self.is_terminal

    def perform_atari_action(self, atari_action):
        state = self.get_current_state()
        reward = self._act(atari_action, self.frame_skip)

        if self.use_gui:
            self.refresh_gui()

        self.frame_history[:-1] = self.frame_history[1:]
        self.frame_history[-1] = np.max(self.last_two_frames, axis=0)
        next_state = self.get_current_state()

        return state, atari_action, reward, next_state, self.is_terminal

    def _act(self, ale_action, repeat):
        reward = 0
        for i in range(repeat):
            reward += self.ale.act(ale_action)
            if i >= repeat - 2:
                self.last_two_frames = [
                    self.last_two_frames[1],
                    self._get_frame()
                ]

        self.is_terminal = self.ale.game_over()

        # terminate the episode if current_lives has decreased
        lives = self.ale.lives()
        if self.current_lives != lives:
            if self.current_lives > lives and self.terminate_on_end_life:
                self.is_terminal = True
            self.current_lives = lives

        return reward

    def get_current_state(self):
        #return copy.copy(self.frame_history)
        return [x.copy() for x in self.frame_history]

    def get_actions_for_state(self, state):
        return [
            self.atari_to_onehot[a] for a in self.ale.getMinimalActionSet()
        ]

    def reset_environment(self):
        self.last_two_frames = [self.zero_history_frames[0], self._get_frame()]

        if self.terminate_on_end_life:
            if self.ale.game_over():
                self.ale.reset_game()
        else:
            self.ale.reset_game()

        self.current_lives = self.ale.lives()

        if self.noop_max > 0:
            num_noops = np.random.randint(self.noop_max + 1)
            self._act(0, num_noops)

        self.previous_action = 0
        self.frame_history = copy.copy(self.zero_history_frames)
        self.frame_history[-1] = np.max(self.last_two_frames, axis=0)

        if self.use_gui:
            self.refresh_gui()

    def is_current_state_terminal(self):
        return self.is_terminal

    def refresh_gui(self):
        current_time = datetime.datetime.now()
        if (current_time - self.last_refresh) > self.refresh_time:
            self.last_refresh = current_time

            gui_image = np.tile(
                np.transpose(self.original_frame, axes=(1, 0, 2)), [1, 1, 3])
            # gui_image = np.zeros((self.screen_width, self.screen_height, 3), dtype=np.uint8)
            # channel = np.random.randint(3)
            # gui_image[:,:,channel] = np.transpose(self.original_frame, axes=(1, 0, 2))[:,:,0]

            pygame.surfarray.blit_array(self.gui_screen, gui_image)
            pygame.display.update()
Esempio n. 17
0
class ALEEnvironment(Environment):
    def __init__(self, rom_file, args):
        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()
        if args.display_screen:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability',
                          args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)

        if args.random_seed:
            self.ale.setInt('random_seed', args.random_seed)

        if args.record_screen_path:
            if not os.path.exists(args.record_screen_path):
                logger.info("Creating folder %s" % args.record_screen_path)
                os.makedirs(args.record_screen_path)
            logger.info("Recording screens to %s", args.record_screen_path)
            self.ale.setString('record_screen_dir', args.record_screen_path)

        if args.record_sound_filename:
            logger.info("Recording sound to %s", args.record_sound_filename)
            self.ale.setBool('sound', True)
            self.ale.setString('record_sound_filename',
                               args.record_sound_filename)

        self.ale.loadROM(rom_file)

        if args.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
            logger.info("Using minimal action set with size %d" %
                        len(self.actions))
        else:
            self.actions = self.ale.getLegalActionSet()
            logger.info("Using full action set with size %d" %
                        len(self.actions))
        logger.debug("Actions: " + str(self.actions))

        # OpenCV expects width as first and height as second
        self.dims = (args.screen_width, args.screen_height)

    def numActions(self):
        return len(self.actions)

    def restart(self):
        self.ale.reset_game()

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def getScreen(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        return resized

    def isTerminal(self):
        return self.ale.game_over()
Esempio n. 18
0
class ALEEnvironment():
    def __init__(self, config):
        self.history = History3D(config)
        self.history_length = config.history_length
        self.mode = config.mode
        self.life_lost = False
        self.terminal = False
        self.score = 0
        #cv2.namedWindow("Image")

        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()
        if config.display_screen:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', False)
            self.ale.setBool('display_screen', True)

        self.ale.setInt('frame_skip',
                        config.frame_skip)  # Whether skip frames or not
        self.ale.setBool('color_averaging', config.color_averaging)

        if config.random_seed:  # Random seed for repeatable experiments.
            self.ale.setInt('random_seed', config.random_seed)

        if config.record_screen_path:
            if not os.path.exists(config.record_screen_path):
                os.makedirs(config.record_screen_path)
            self.ale.setString('record_screen_dir', config.record_screen_path)

        if config.record_sound_filename:
            self.ale.setBool('sound', True)
            self.ale.setString('record_sound_filename',
                               config.record_sound_filename)

        self.ale.loadROM(config.rom_file)

        if config.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
        else:
            self.actions = self.ale.getLegalActionSet()

        self.screen_width = config.screen_width
        self.screen_height = config.screen_height

    def numActions(self):
        return len(self.actions)

    def new_game(self):
        state, terminal = self.reset()
        for _ in range(self.history_length + 1):
            self.history.add(state)
        return state, terminal, list(range(len(self.actions)))

    def reset(self):
        # In test mode, the game is simply initialized. In train mode, if the game
        # is in terminal state due to a life loss but not yet game over, then only
        # life loss flag is reset so that the next game starts from the current
        # state. Otherwise, the game is simply initialized.
        if (self.mode == 'test' or not self.life_lost or self.ale.game_over()):
            # `reset` called in a middle of episode  # all lives are lost
            self.ale.reset_game()
        self.life_lost = False
        return self.getScreen(), self.isTerminal()

    def step(self, action):
        lives = self.ale.lives()
        reward = self.ale.act(self.actions[action])
        self.life_lost = (not lives == self.ale.lives())
        self.score += reward
        self.current_state = self.getScreen()
        self.history.add(self.current_state)
        self.terminal = self.isTerminal()
        return reward, self.history.get(), self.terminal

    def getScreen(self):
        screen = self.ale.getScreenGrayscale()
        #print 'screen:\n',type(screen)
        #print 'screen.shape',screen.shape
        resized = cv2.resize(screen / 255.,
                             (self.screen_width, self.screen_height))

        #cv2.imshow("Image", screen)
        '''
		cv2.namedWindow("Image")
		cv2.destroyAllWindows()
		'''
        return resized

    def isTerminal(self):
        if self.mode == 'train':
            return self.ale.game_over() or self.life_lost
        return self.ale.game_over()
Esempio n. 19
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    NOTE: will automatically restart when a real episode ends
    """
    def __init__(self, rom_file, viz=0, height_range=(None,None),
            frame_skip=4, image_shape=(84, 84), nullop_start=30,
            live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_dir('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
                "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)

            self.ale.setInt(b"random_seed", self.rng.randint(0, 10000))
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()


        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) float32 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                #m = cv2.resize(ret, (1920,1200))
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1],:].astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_action_space(self):
        return DiscreteActionSpace(len(self.actions))

    def restart_episode(self):
        if self.current_episode_score.count > 0:
            self.stats['score'].append(self.current_episode_score.sum)
        self.current_episode_score.reset()
        self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if isOver:
            self.restart_episode()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        return (r, isOver)
Esempio n. 20
0
class AtariPlayer(gym.Env):
    """
    A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings.

    Info:
        score: the accumulated reward in the current game
        gameOver: True when the current game is Over
    """

    def __init__(self, rom_file, viz=0,
                 frame_skip=4, nullop_start=30,
                 live_lost_as_eoe=True, max_num_frames=0):
        """
        Args:
            rom_file: path to the rom
            frame_skip: skip every k frames and repeat the action
            viz: visualization to be done.
                Set to 0 to disable.
                Set to a positive number to be the delay between frames to show.
                Set to a string to be a directory to store frames.
            nullop_start: start with random number of null ops.
            live_losts_as_eoe: consider lost of lives as end of episode. Useful for training.
            max_num_frames: maximum number of frames per episode.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
            "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
        except AttributeError:
            if execute_only_once():
                logger.warn("You're not using latest ALE")

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setInt(b"max_num_frames_per_episode", max_num_frames)
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start

        self.action_space = spaces.Discrete(len(self.actions))
        self.observation_space = spaces.Box(
            low=0, high=255, shape=(self.height, self.width, 1), dtype=np.uint8)
        self._restart_episode()

    def get_action_meanings(self):
        return [ACTION_MEANING[i] for i in self.actions]

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def _current_state(self):
        """
        :returns: a gray-scale (h, w, 1) uint8 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                cv2.waitKey(int(self.viz * 1000))
        ret = ret.astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis]
        return ret.astype('uint8')  # to save some memory

    def _restart_episode(self):
        with _ALE_LOCK:
            self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def reset(self):
        if self.ale.game_over():
            self._restart_episode()
        return self._current_state()

    def step(self, act):
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        isOver = self.ale.game_over()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives

        info = {'ale.lives': newlives}
        return self._current_state(), r, isOver, info
Esempio n. 21
0
class ALEEnvironment(Environment):
    def __init__(self, rom_file, args):
        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()
        if args.display_screen:
            self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability',
                          args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)

        if args.random_seed:
            self.ale.setInt('random_seed', args.random_seed)

        if args.record_screen_path:
            if not os.path.exists(args.record_screen_path):
                logger.info("Creating folder %s" % args.record_screen_path)
                os.makedirs(args.record_screen_path)
            logger.info("Recording screens to %s", args.record_screen_path)
            self.ale.setString('record_screen_dir', args.record_screen_path)

        if args.record_sound_filename:
            logger.info("Recording sound to %s", args.record_sound_filename)
            self.ale.setBool('sound', True)
            self.ale.setString('record_sound_filename',
                               args.record_sound_filename)

        self.ale.loadROM(rom_file)

        if args.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
            logger.info("Using minimal action set with size %d" %
                        len(self.actions))
        else:
            self.actions = self.ale.getLegalActionSet()
            logger.info("Using full action set with size %d" %
                        len(self.actions))
        logger.debug("Actions: " + str(self.actions))

        self.screen_width = args.screen_width
        self.screen_height = args.screen_height

        self.life_lost = False

    def numActions(self):
        return len(self.actions)

    def restart(self):
        # In test mode, the game is simply initialized. In train mode, if the game
        # is in terminal state due to a life loss but not yet game over, then only
        # life loss flag is reset so that the next game starts from the current
        # state. Otherwise, the game is simply initialized.
        if (self.mode == 'test' or not self.life_lost
                or  # `reset` called in a middle of episode
                self.ale.game_over()  # all lives are lost
            ):
            self.ale.reset_game()
        self.life_lost = False

    def act(self, action):
        lives = self.ale.lives()
        reward = self.ale.act(self.actions[action])
        self.life_lost = (not lives == self.ale.lives())
        return reward

    def getScreen(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, (self.screen_width, self.screen_height))
        return resized

    def isTerminal(self):
        if self.mode == 'train':
            return self.ale.game_over() or self.life_lost
        return self.ale.game_over()
Esempio n. 22
0
# Set USE_SDL to true to display the screen. ALE must be compilied
# with SDL enabled for this to work. On OSX, pygame init is used to
# proxy-call SDL_main.
USE_SDL = False
if USE_SDL:
  if sys.platform == 'darwin':
    import pygame
    pygame.init()
    ale.setBool('sound', False) # Sound doesn't work on OSX
  elif sys.platform.startswith('linux'):
    ale.setBool('sound', True)
  ale.setBool('display_screen', True)

recordPath = 'record'
ale.setString('record_screen_dir', recordPath)

# Load the ROM file
ale.loadROM(sys.argv[1])

# Get the list of legal actions
legal_actions = ale.getLegalActionSet()
one_hot = {0:"1 0 0 0 0 0 0 0 0", 
  1:"0 1 0 0 0 0 0 0 0", 
  2:"0 0 1 0 0 0 0 0 0", 
  3:"0 0 0 1 0 0 0 0 0", 
  4:"0 0 0 0 1 0 0 0 0", 
  5:"0 0 0 0 0 1 0 0 0", 
  6:"0 0 0 0 0 0 1 0 0", 
  7:"0 0 0 0 0 0 0 1 0", 
  8:"0 0 0 0 0 0 0 0 1"}
Esempio n. 23
0
class AtariPlayer(gym.Env):
    """
    A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings.

    Info:
        score: the accumulated reward in the current game
        gameOver: True when the current game is Over
    """

    def __init__(self, rom_file, viz=0,
                 frame_skip=4, nullop_start=30,
                 live_lost_as_eoe=True, max_num_frames=0):
        """
        Args:
            rom_file: path to the rom
            frame_skip: skip every k frames and repeat the action
            viz: visualization to be done.
                Set to 0 to disable.
                Set to a positive number to be the delay between frames to show.
                Set to a string to be a directory to store frames.
            nullop_start: start with random number of null ops.
            live_losts_as_eoe: consider lost of lives as end of episode. Useful for training.
            max_num_frames: maximum number of frames per episode.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
            "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
        except AttributeError:
            if execute_only_once():
                logger.warn("You're not using latest ALE")

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setInt(b"max_num_frames_per_episode", max_num_frames)
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start

        self.action_space = spaces.Discrete(len(self.actions))
        self.observation_space = spaces.Box(
            low=0, high=255, shape=(self.height, self.width))
        self._restart_episode()

    def get_action_meanings(self):
        return [ACTION_MEANING[i] for i in self.actions]

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def _current_state(self):
        """
        :returns: a gray-scale (h, w) uint8 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                cv2.waitKey(int(self.viz * 1000))
        ret = ret.astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        return ret.astype('uint8')  # to save some memory

    def _restart_episode(self):
        with _ALE_LOCK:
            self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def _reset(self):
        if self.ale.game_over():
            self._restart_episode()
        return self._current_state()

    def _step(self, act):
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        isOver = self.ale.game_over()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives

        info = {'ale.lives': newlives}
        return self._current_state(), r, isOver, info
class Agent():
    def __init__(self, game, agent_type, display, load_model, record, test):
        self.name = game
        self.agent_type = agent_type
        self.ale = ALEInterface()
        self.ale.setInt(str.encode('random_seed'), np.random.randint(100))
        self.ale.setBool(str.encode('display_screen'), display or record)
        if record:
            self.ale.setString(str.encode('record_screen_dir'), str.encode('./data/recordings/{}/{}/tmp/'.format(game, agent_type)))

        self.ale.loadROM(str.encode('./roms/{}.bin'.format(self.name)))
        self.action_list = list(self.ale.getMinimalActionSet())
        self.frame_shape = np.squeeze(self.ale.getScreenGrayscale()).shape
        if test:
            self.name += '_test'

        if 'space_invaders' in self.name:
            # Account for blinking bullets
            self.frameskip = 2
        else:
            self.frameskip = 3

        self.frame_buffer = deque(maxlen=4)
        if load_model and not record:
            self.load_replaymemory()
        else:
            self.replay_memory = ReplayMemory(500000, 32)

        model_input_shape = self.frame_shape + (4,)
        model_output_shape = len(self.action_list)

        if agent_type == 'dqn':
            self.model = DeepQN(
                model_input_shape,
                model_output_shape,
                self.action_list,
                self.replay_memory,
                self.name,
                load_model
            )
        elif agent_type == 'double':
            self.model = DoubleDQN(
                model_input_shape,
                model_output_shape,
                self.action_list,
                self.replay_memory,
                self.name,
                load_model
            )

        else:
            self.model = DuelingDQN(
                model_input_shape,
                model_output_shape,
                self.action_list,
                self.replay_memory,
                self.name,
                load_model
            )

        print('{} Loaded!'.format(' '.join(self.name.split('_')).title()))
        print('Displaying: ', display)
        print('Frame Shape: ', self.frame_shape)
        print('Frame Skip: ', self.frameskip)
        print('Action Set: ', self.action_list)
        print('Model Input Shape: ', model_input_shape)
        print('Model Output Shape: ', model_output_shape)
        print('Agent: ', agent_type)

    def training(self, steps):
        '''
        Trains the agent for :steps number of weight updates.

        Returns the average model loss
        '''

        loss = []

        # Initialize frame buffer. np.squeeze removes empty dimensions e.g. if shape=(210,160,__)
        self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
        self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
        self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
        self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))

        try:
            for step in range(steps):
                gameover = False
                initial_state = np.stack(self.frame_buffer, axis=-1)
                action = self.model.predict_action(initial_state)

                # Backup data
                if step % 5000 == 0:
                    self.model.save_model()
                    self.model.save_hyperparams()
                    self.save_replaymemory()

                # If using a target model check for weight updates
                if hasattr(self.model, 'tau'):
                    if self.model.tau == 0:
                        self.model.update_target_model()
                        self.model.tau = 10000
                    else:
                        self.model.tau -= 1

                # Frame skipping technique https://danieltakeshi.github.io/2016/11/25/frame-skipping-and-preprocessing-for-deep-q-networks-on-atari-2600-games/
                lives_before = self.ale.lives()
                for _ in range(self.frameskip):
                    self.ale.act(action)

                reward = self.ale.act(action)
                self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
                lives_after = self.ale.lives()

                if lives_after < lives_before:
                    gameover = True  # Taking advice from dude on reddit
                    reward = -1

                if self.ale.game_over():
                    gameover = True
                    reward = -1
                    self.ale.reset_game()

                new_state = np.stack(self.frame_buffer, axis=-1)

                # Experiment with clipping rewards for stability purposes
                reward = np.clip(reward, -1, 1)
                self.replay_memory.add(
                    initial_state,
                    action,
                    reward,
                    gameover,
                    new_state
                )

                loss += self.model.replay_train()
        except:
            self.model.save_model()
            self.model.save_hyperparams()
            self.save_replaymemory()
            raise KeyboardInterrupt

        return np.mean(loss, axis=0)

    def simulate_random(self):
        print('Simulating game randomly')
        done = False
        total_reward = 0
        while not done:
            action = np.random.choice(self.ale.getMinimalActionSet())
            reward = self.ale.act(action)
            total_reward += reward
            if self.ale.game_over():
                reward = -1
                done = True

            reward = np.clip(reward, -1, 1)
            if reward != 0:
                print(reward)

        frames_survived = self.ale.getEpisodeFrameNumber()
        self.ale.reset_game()
        return total_reward, frames_survived

    def simulate_intelligent(self, evaluating=False):
        done = False
        total_score = 0

        self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
        self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
        self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
        self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
        while not done:
            state = np.stack(self.frame_buffer, axis=-1)
            action = self.model.predict_action(state, evaluating)

            for _ in range(self.frameskip):
                self.ale.act(action)

            # Remember, ale.act returns the increase in game score with this action
            total_score += self.ale.act(action)

            # Pushes oldest frame out
            self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
            if self.ale.game_over():
                done = True

        frames_survived = self.ale.getEpisodeFrameNumber()
        print('   Game Over')
        print('   Frames Survived: ', frames_survived)
        print('   Score: ', total_score)
        print('===========================')
        self.ale.reset_game()
        return total_score, frames_survived

    def save_replaymemory(self):
        with bz2.BZ2File('./data/{}/{}_replaymem.obj'.format(self.agent_type, self.name), 'wb') as f:
            pickle.dump(self.replay_memory, f, protocol=pickle.HIGHEST_PROTOCOL)
            print('Saved replay memory at ', datetime.now())

    def load_replaymemory(self):
        try:
            with bz2.BZ2File('./data/{}/{}_replaymem.obj'.format(self.agent_type, self.name), 'rb') as f:
                self.replay_memory = pickle.load(f)
                print('Loaded replay memory at ', datetime.now())
        except FileNotFoundError:
            print('No replay memory file found')
            raise KeyboardInterrupt
Esempio n. 25
0
class AleEnvironment(Environment):
  def __init__(self, rom_name, record_display=False, show_display=False, id = 0, shrink=False, life_lost_as_end=True, use_grayscale=True):
    super(AleEnvironment, self).__init__()
    self.ale = ALEInterface()
    self.ale.setInt('random_seed', int(np.random.rand() * 100))
    self.ale.setFloat('repeat_action_probability', 0.0)
    self.ale.setBool('color_averaging', False)
    self.record_display = record_display
    self.show_display = show_display

    if self.record_display:
      self.ale.setString('record_screen_dir', 'movie')
    elif self.show_display:
      self.display_name = rom_name + '_' + str(id)
      cv2.startWindowThread()
      cv2.namedWindow(self.display_name)

    self.ale.loadROM(rom_name)
    self.actions = self.ale.getMinimalActionSet()
    self.screen_width, self.screen_height = self.ale.getScreenDims()
    self.use_grayscale = use_grayscale
    if self.use_grayscale:
      self.screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8)
    else:
      self.screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8)
      self.prev_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8)
    self.shrink = shrink
    self.life_lost_as_end = life_lost_as_end
    self.lives_lost = False
    self.lives = self.ale.lives()


  def __enter__(self):
    return self


  def __exit__(self, exc_type, exc_value, traceback):
    cv2.destroyWindow(self.display_name)


  def act(self, action):
    reward = self.ale.act(self.actions[action])
    if self.use_grayscale:
      screen = self.ale.getScreenGrayscale(self.screen)
    else:
      current_screen = self.ale.getScreenRGB(self.screen)
      screen = np.maximum(current_screen, self.prev_screen)
      self.prev_screen = current_screen
      screen = screen[:, :, 0] * 0.2126 + screen[:, :, 1] * 0.0722 + screen[:, :, 2] * 0.7152
      screen = screen.astype(np.uint8)
    screen = np.reshape(screen, (self.screen_height, self.screen_width, 1))
    state = self.preprocess(screen)
    self.lives_lost = True if self.lives > self.ale.lives() else False
    self.lives = self.ale.lives()
    return reward, state


  def is_end_state(self):
    if self.life_lost_as_end:
      return self.ale.game_over() or self.lives_lost
    else:
      return self.ale.game_over()


  def reset(self):
    if self.ale.game_over():
      self.ale.reset_game()
    self.lives = self.ale.lives()
    self.lives_lost = False


  def available_actions(self):
    # return available indexes instead of actual action value
    return range(0, len(self.actions))


  def preprocess(self, screen):
    if self.show_display and not self.record_display:
      cv2.imshow(self.display_name, screen)

    if self.shrink:
      resized = cv2.resize(screen, (84, 84))
    else:
      resized = cv2.resize(screen, (84, 110))
      resized = resized[18:102, :]

    scaled = resized.astype(np.float32) / 255.0
    return scaled
Esempio n. 26
0
class ALEEnvironment(Environment):
    def __init__(self, rom_file, args):
        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()

        # Set ALE configuration
        self.ale.setInt(b'frame_skip', args.frame_skip)
        self.ale.setFloat(b'repeat_action_probability',
                          args.repeat_action_probability)
        self.ale.setBool(b'color_averaging', args.color_averaging)

        if args.random_seed:
            self.ale.setInt(b'random_seed', args.random_seed)

        if args.record_screen_path:
            if not os.path.exists(args.record_screen_path):
                os.makedirs(args.record_screen_path)
            self.ale.setString(b'record_screen_dir',
                               args.record_screen_path.encode())

        if args.record_sound_filename:
            self.ale.setBool(b'sound', True)
            self.ale.setString(b'record_sound_filename',
                               args.record_sound_filename.encode())

        # Load ROM
        self.ale.loadROM(rom_file.encode())

        # Set game difficulty and mode (after loading)
        self.ale.setDifficulty(args.game_difficulty)
        self.ale.setMode(args.game_mode)

        # Whether to use minimum set or set
        if args.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
        else:
            self.actions = self.ale.getLegalActionSet()

        # Life lost control
        self.life_lost = False

        # Initialize base class
        super(ALEEnvironment, self).__init__(args)

    def action_dim(self):
        return len(self.actions)

    def reset(self):
        # In test mode, the game is simply initialized. In train mode, if the game
        # is in terminal state due to a life loss but not yet game over, then only
        # life loss flag is reset so that the next game starts from the current
        # state. Otherwise, the game is simply initialized.
        if (self.mode == 'test' or not self.life_lost
                or  # `reset` called in a middle of episode
                self.ale.game_over()  # all lives are lost
            ):
            self.ale.reset_game()
        self.life_lost = False
        screen = self._get_state(self.ale.getScreenRGB())
        return screen

    def step(self, action, action_b=0, ignore_screen=False):
        lives = self.ale.lives()
        # Act on environment
        reward = self.ale.act(self.actions[action],
                              self.actions[action_b] + 18)
        # Check if life was lost
        self.life_lost = (not lives == self.ale.lives())
        # Check terminal state
        terminal = (self.ale.game_over() or self.life_lost
                    ) if self.mode == 'train' else self.ale.game_over()
        # Check if should ignore the screen (in case of RobotEnvironment)
        if ignore_screen:
            screen = None
        else:
            # Get screen from ALE
            screen = self._get_state(self.ale.getScreenRGB())
            # Wait for next frame to start
            self.fps_control.wait_next_frame()
        return screen, reward, terminal
Esempio n. 27
0
class ALEEnvironment(Environment):
  def __init__(self, rom_file, args):
    from ale_python_interface import ALEInterface
    self.ale = ALEInterface()
    if args.display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', args.frame_skip)
    self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
    self.ale.setBool('color_averaging', args.color_averaging)

    if args.random_seed:
      self.ale.setInt('random_seed', args.random_seed)

    if args.record_screen_path:
      if not os.path.exists(args.record_screen_path):
        logger.info("Creating folder %s" % args.record_screen_path)
        os.makedirs(args.record_screen_path)
      logger.info("Recording screens to %s", args.record_screen_path)
      self.ale.setString('record_screen_dir', args.record_screen_path)

    if args.record_sound_filename:
      logger.info("Recording sound to %s", args.record_sound_filename)
      self.ale.setBool('sound', True)
      self.ale.setString('record_sound_filename', args.record_sound_filename)

    self.ale.loadROM(rom_file)

    if args.minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
      logger.info("Using minimal action set with size %d" % len(self.actions))
    else:
      self.actions = self.ale.getLegalActionSet()
      logger.info("Using full action set with size %d" % len(self.actions))
    logger.debug("Actions: " + str(self.actions))

    self.screen_width = args.screen_width
    self.screen_height = args.screen_height

    self.life_lost = False

  def numActions(self):
    return len(self.actions)

  def restart(self):
    # In test mode, the game is simply initialized. In train mode, if the game
    # is in terminal state due to a life loss but not yet game over, then only
    # life loss flag is reset so that the next game starts from the current
    # state. Otherwise, the game is simply initialized.
    if (
        self.mode == 'test' or
        not self.life_lost or  # `reset` called in a middle of episode
        self.ale.game_over()  # all lives are lost
    ):
      self.ale.reset_game()
    self.life_lost = False

  def act(self, action):
    lives = self.ale.lives()
    reward = self.ale.act(self.actions[action])
    self.life_lost = (not lives == self.ale.lives())
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, (self.screen_width, self.screen_height))
    return resized

  def isTerminal(self):
    if self.mode == 'train':
      return self.ale.game_over() or self.life_lost
    return self.ale.game_over()
Esempio n. 28
0
def test(session,
         hist_len=4,
         discount=0.99,
         act_rpt=4,
         upd_freq=4,
         min_sq_grad=0.01,
         epsilon=0.05,
         no_op_max=30,
         num_tests=1,
         learning_rate=0.0025,
         momentum=0.95,
         sq_momentum=0.95):
    #Create ALE object
    if len(sys.argv) < 3:
        print('Usage: %s rom_file record_screen_dir' % sys.argv[0])
        sys.exit()

    ale = ALEInterface()

    record_path = sys.argv[2]
    ale.setString('record_screen_dir', record_path)
    ale.setString('record_sound_filename', (record_path + '/sound.wav'))
    ale.setInt('fragsize', 64)
    cmd = 'mkdir '
    cmd += record_path
    os.system(cmd)

    # Get & Set the desired settings
    ale.setInt('random_seed', 123)
    #Changes repeat action probability from default of 0.25
    ale.setFloat('repeat_action_probability', 0.0)

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.
    USE_SDL = False
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', True)

    # Load the ROM file
    ale.loadROM(sys.argv[1])

    # create DQN agent
    # learning_rate and momentum are unused parameters (but needed)
    agent = DQN(ale, session, epsilon, learning_rate, momentum, sq_momentum,
                hist_len, len(ale.getMinimalActionSet()), None, discount,
                rom_name(sys.argv[1]))

    #Store the most recent two images
    preprocess_stack = deque([], 2)

    num_episodes = 0
    while num_episodes < num_tests:
        #initialize sequence with initial image
        seq = list()
        #We only have one image, we cannot combine two images
        perform_no_ops(ale, no_op_max, preprocess_stack, seq)
        #proc_seq.append(pp.preprocess(seq))
        total_reward = 0

        while not ale.game_over():
            state = get_state(seq, hist_len)
            action = agent.get_action_best_network(state, epsilon)
            #skip frames by repeating action
            reward = 0
            for i in range(act_rpt):
                reward = reward + ale.act(action)
                preprocess_stack.append(ale.getScreenRGB())
            seq.append(pp.preprocess(preprocess_stack[0], preprocess_stack[1]))
            total_reward += reward
        print('Episode ended with score: %d' % (total_reward))
        num_episodes = num_episodes + 1
        ale.reset_game()
Esempio n. 29
0
class Environment:
    """docstring for Environment"""

    BUFFER_LEN = 2
    EPISODE_FRAMES = 18000
    EPOCH_COUNT = 200
    EPOCH_STEPS = 250000
    EVAL_EPS = 0.001
    FRAMES_SKIP = 4
    FRAME_HEIGHT = 84
    FRAME_WIDTH = 84
    MAX_NO_OP = 30
    MAX_REWARD = 1

    def __init__(self, rom_name, rng, display_screen=False):
        self.api = ALEInterface()
        self.api.setInt('random_seed', rng.randint(333))
        self.api.setBool('display_screen', display_screen)
        self.api.setFloat('repeat_action_probability', 0.0)
        self.rom_name = rom_name
        self.display_screen = display_screen
        self.rng = rng
        self.repeat = Environment.FRAMES_SKIP
        self.buffer_len = Environment.BUFFER_LEN
        self.height = Environment.FRAME_HEIGHT
        self.width = Environment.FRAME_WIDTH
        self.episode_steps = Environment.EPISODE_FRAMES / Environment.FRAMES_SKIP
        self.merge_id = 0
        self.max_reward = Environment.MAX_REWARD
        self.eval_eps = Environment.EVAL_EPS
        self.log_dir = ''
        self.network_dir = ''

        self.api.loadROM('../rom/' + self.rom_name)
        self.minimal_actions = self.api.getMinimalActionSet()
        original_width, original_height = self.api.getScreenDims()
        self.merge_frame = np.zeros(
            (self.buffer_len, original_height, original_width), dtype=np.uint8)

    def get_action_count(self):
        return len(self.minimal_actions)

    def train(self, agent, store_freq, folder=None, start_epoch=0):
        self._open_log_files(agent, folder)
        obs = np.zeros((self.height, self.width), dtype=np.uint8)
        epoch_count = Environment.EPOCH_COUNT

        for epoch in xrange(start_epoch, epoch_count):
            self.need_reset = True
            steps_left = Environment.EPOCH_STEPS

            print "\n" + "=" * 50
            print "Epoch #%d" % (epoch + 1)
            episode = 0
            train_start = time.time()
            while steps_left > 0:
                num_step, _ = self._run_episode(agent, steps_left, obs)
                steps_left -= num_step
                episode += 1
                if steps_left == 0 or episode % 10 == 0:
                    print "Finished episode #%d, steps_left = %d" \
                     % (episode, steps_left)
            train_end = time.time()

            valid_values = agent.get_validate_values()
            eval_values = self.evaluate(agent)
            test_end = time.time()

            train_time = train_end - train_start
            test_time = test_end - train_end
            step_per_sec = Environment.EPOCH_STEPS * 1. / max(1, train_time)
            print "\tFinished epoch #%d, episode trained = %d\n" \
             "\tValidate values = %.3f, evaluate reward = %.3f\n"\
             "\tTrain time = %.0fs, test time = %.0fs, steps/sec = %.4f" \
              % (epoch + 1, episode, valid_values, eval_values\
               , train_time, test_time, step_per_sec)

            self._update_log_files(agent, epoch + 1, episode, valid_values,
                                   eval_values, train_time, test_time,
                                   step_per_sec, store_freq)
            gc.collect()

    def evaluate(self, agent, episodes=30, obs=None):
        print "\n***Start evaluating"
        if obs is None:
            obs = np.zeros((self.height, self.width), dtype=np.uint8)
        sum_reward = 0.0
        sum_step = 0.0
        for episode in xrange(episodes):
            self.need_reset = True
            step, reward = self._run_episode(agent,
                                             self.episode_steps,
                                             obs,
                                             self.eval_eps,
                                             evaluating=True)
            sum_reward += reward
            sum_step += step
            print "Finished episode %d, reward = %d, step = %d" \
              % (episode + 1, reward, step)
        self.need_reset = True
        print "Average reward per episode = %.4f" % (sum_reward / episodes)
        print "Average step per episode = %.4f" % (sum_step / episodes)
        return sum_reward / episodes

    def _prepare_game(self):
        if self.need_reset or self.api.game_over():
            self.api.reset_game()
            self.need_reset = False
            if Environment.MAX_NO_OP > 0:
                num_no_op = self.rng.randint(Environment.MAX_NO_OP + 1) \
                   + self.buffer_len
                for _ in xrange(num_no_op):
                    self.api.act(0)

        for _ in xrange(self.buffer_len):
            self._update_buffer()

    def _run_episode(self, agent, steps_left, obs, eps=0.0, evaluating=False):
        self._prepare_game()

        start_lives = self.api.lives()
        step_count = 0
        sum_reward = 0
        is_terminal = False
        while step_count < steps_left and not is_terminal:
            self._get_screen(obs)
            action_id, _ = agent.get_action(obs, eps, evaluating)

            reward = self._repeat_action(self.minimal_actions[action_id])
            reward_clip = reward
            if self.max_reward > 0:
                reward_clip = np.clip(reward, -self.max_reward,
                                      self.max_reward)

            life_lost = not evaluating and self.api.lives() < start_lives
            is_terminal = self.api.game_over() or life_lost \
               or step_count + 1 >= steps_left

            agent.add_experience(obs, is_terminal, action_id, reward_clip,
                                 evaluating)
            sum_reward += reward
            step_count += 1

        return step_count, sum_reward

    def _update_buffer(self):
        self.api.getScreenGrayscale(self.merge_frame[self.merge_id, ...])
        self.merge_id = (self.merge_id + 1) % self.buffer_len

    def _repeat_action(self, action):
        reward = 0
        for i in xrange(self.repeat):
            reward += self.api.act(action)
            if i + self.buffer_len >= self.repeat:
                self._update_buffer()
        return reward

    def _get_screen(self, resized_frame):
        self._resize_frame(self.merge_frame.max(axis=0), resized_frame)

    def _resize_frame(self, src_frame, dst_frame):
        cv2.resize(src=src_frame,
                   dst=dst_frame,
                   dsize=(self.width, self.height),
                   interpolation=cv2.INTER_LINEAR)

    def _open_log_files(self, agent, folder):
        time_str = time.strftime("_%m-%d-%H-%M", time.localtime())
        base_rom_name = os.path.splitext(os.path.basename(self.rom_name))[0]

        if folder is not None:
            self.log_dir = folder
            self.network_dir = self.log_dir + '/network'
        else:
            self.log_dir = '../run_results/' + base_rom_name + time_str
            self.network_dir = self.log_dir + '/network'

        info_name = get_next_name(self.log_dir, 'info', 'txt')
        git_name = get_next_name(self.log_dir, 'git-diff', '')

        try:
            os.stat(self.log_dir)
        except OSError:
            os.makedirs(self.log_dir)

        try:
            os.stat(self.network_dir)
        except OSError:
            os.makedirs(self.network_dir)

        with open(os.path.join(self.log_dir, info_name), 'w') as f:
            f.write('Commit: ' +
                    subprocess.check_output(['git', 'rev-parse', 'HEAD']))
            f.write('Run command: ')
            f.write(' '.join(pipes.quote(x) for x in sys.argv))
            f.write('\n\n')
            f.write(agent.get_info())
            write_info(f, Environment)
            write_info(f, agent.__class__)
            write_info(f, agent.network.__class__)

        # From https://github.com/spragunr/deep_q_rl/pull/49/files
        with open(os.path.join(self.log_dir, git_name), 'w') as f:
            f.write(subprocess.check_output(['git', 'diff', 'HEAD']))

        if folder is not None:
            return

        with open(os.path.join(self.log_dir, 'results.csv'), 'w') as f:
            f.write("epoch,episode_train,validate_values,evaluate_reward"\
             ",train_time,test_time,steps_per_second\n")

        mem = psutil.virtual_memory()
        with open(os.path.join(self.log_dir, 'memory.csv'), 'w') as f:
            f.write("epoch,available,free,buffers,cached"\
              ",available_readable,used_percent\n")
            f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \
              (0, mem.available, mem.free, mem.buffers, mem.cached
              , bytes2human(mem.available), mem.percent))

    def _update_log_files(self, agent, epoch, episode, valid_values,
                          eval_values, train_time, test_time, step_per_sec,
                          store_freq):
        print "Updating log files"
        with open(self.log_dir + '/results.csv', 'a') as f:
            f.write("%d,%d,%.4f,%.4f,%d,%d,%.4f\n" % \
               (epoch, episode, valid_values, eval_values
               , train_time, test_time, step_per_sec))

        mem = psutil.virtual_memory()
        with open(self.log_dir + '/memory.csv', 'a') as f:
            f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \
              (epoch, mem.available, mem.free, mem.buffers, mem.cached
              , bytes2human(mem.available), mem.percent))

        agent.dump_network(self.network_dir + ('/%03d' % (epoch)) + '.npz')

        if (store_freq >= 0 and epoch >= Environment.EPOCH_COUNT) or \
          (store_freq > 0 and (epoch % store_freq == 0)):
            agent.dump_exp(self.network_dir + '/exp.npz')

    def _setup_record(self, network_file):
        file_name, _ = os.path.splitext(os.path.basename(network_file))
        time_str = time.strftime("_%m-%d-%H-%M", time.localtime())
        img_dir = os.path.dirname(network_file) + '/images_' \
           + file_name + time_str
        rom_name, _ = os.path.splitext(self.rom_name)
        out_name = os.path.dirname(network_file) + '/' + rom_name + '_' \
           + file_name + time_str + '.mov'
        print out_name

        try:
            os.stat(img_dir)
        except OSError:
            os.makedirs(img_dir)

        self.api.setString('record_screen_dir', img_dir)
        self.api.loadROM('../rom/' + self.rom_name)
        return img_dir, out_name

    def record_run(self, agent, network_file, episode_id=1):
        if episode_id > 1:
            self.evaluate(agent, episode_id - 1)
            system_state = self.api.cloneSystemState()

        img_dir, out_name = self._setup_record(network_file)

        if episode_id > 1:
            self.api.restoreSystemState(system_state)

        self.evaluate(agent, 1)
        script = \
          """
					{
						ffmpeg -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s
					} || {
						avconv -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s
					}
				""" % (img_dir, out_name, img_dir, out_name)
        os.system(script)
Esempio n. 30
0
class Environment:
	"""docstring for Environment"""

	BUFFER_LEN = 2
	EPISODE_FRAMES = 18000
	EPOCH_COUNT = 200
	EPOCH_STEPS = 250000
	EVAL_EPS = 0.001
	FRAMES_SKIP = 4
	FRAME_HEIGHT = 84
	FRAME_WIDTH = 84
	MAX_NO_OP = 30
	MAX_REWARD = 1
	
	def __init__(self, rom_name, rng, display_screen = False):
		self.api = ALEInterface()
		self.api.setInt('random_seed', rng.randint(333))
		self.api.setBool('display_screen', display_screen)
		self.api.setFloat('repeat_action_probability', 0.0)
		self.rom_name = rom_name
		self.display_screen = display_screen
		self.rng = rng
		self.repeat = Environment.FRAMES_SKIP
		self.buffer_len = Environment.BUFFER_LEN
		self.height = Environment.FRAME_HEIGHT
		self.width = Environment.FRAME_WIDTH
		self.episode_steps = Environment.EPISODE_FRAMES / Environment.FRAMES_SKIP
		self.merge_id = 0
		self.max_reward = Environment.MAX_REWARD
		self.eval_eps = Environment.EVAL_EPS
		self.log_dir = ''
		self.network_dir = ''

		self.api.loadROM('../rom/' + self.rom_name)
		self.minimal_actions = self.api.getMinimalActionSet()
		original_width, original_height = self.api.getScreenDims()
		self.merge_frame = np.zeros((self.buffer_len
								, original_height
								, original_width)
								, dtype = np.uint8)

	def get_action_count(self):
		return len(self.minimal_actions)

	def train(self, agent, store_freq, folder = None, start_epoch = 0):
		self._open_log_files(agent, folder)
		obs = np.zeros((self.height, self.width), dtype = np.uint8)
		epoch_count = Environment.EPOCH_COUNT

		for epoch in xrange(start_epoch, epoch_count):
			self.need_reset = True
			steps_left = Environment.EPOCH_STEPS

			print "\n" + "=" * 50
			print "Epoch #%d" % (epoch + 1)
			episode = 0
			train_start = time.time()
			while steps_left > 0:
				num_step, _ = self._run_episode(agent, steps_left, obs)
				steps_left -= num_step
				episode += 1
				if steps_left == 0 or episode % 10 == 0:
					print "Finished episode #%d, steps_left = %d" \
						% (episode, steps_left)
			train_end = time.time()

			valid_values = agent.get_validate_values()
			eval_values = self.evaluate(agent)
			test_end = time.time()

			train_time = train_end - train_start
			test_time = test_end - train_end
			step_per_sec = Environment.EPOCH_STEPS * 1. / max(1, train_time)
			print "\tFinished epoch #%d, episode trained = %d\n" \
				"\tValidate values = %.3f, evaluate reward = %.3f\n"\
				"\tTrain time = %.0fs, test time = %.0fs, steps/sec = %.4f" \
					% (epoch + 1, episode, valid_values, eval_values\
						, train_time, test_time, step_per_sec)

			self._update_log_files(agent, epoch + 1, episode
								, valid_values, eval_values
								, train_time, test_time
								, step_per_sec, store_freq)
			gc.collect()

	def evaluate(self, agent, episodes = 30, obs = None):
		print "\n***Start evaluating"
		if obs is None:
			obs = np.zeros((self.height, self.width), dtype = np.uint8)
		sum_reward = 0.0
		sum_step = 0.0
		for episode in xrange(episodes):
			self.need_reset = True
			step, reward = self._run_episode(agent, self.episode_steps, obs
											, self.eval_eps, evaluating = True)
			sum_reward += reward
			sum_step += step
			print "Finished episode %d, reward = %d, step = %d" \
					% (episode + 1, reward, step)
		self.need_reset = True
		print "Average reward per episode = %.4f" % (sum_reward / episodes)
		print "Average step per episode = %.4f" % (sum_step / episodes)
		return sum_reward / episodes

	def _prepare_game(self):
		if self.need_reset or self.api.game_over():
			self.api.reset_game()
			self.need_reset = False
			if Environment.MAX_NO_OP > 0:
				num_no_op = self.rng.randint(Environment.MAX_NO_OP + 1) \
							+ self.buffer_len
				for _ in xrange(num_no_op):
					self.api.act(0)

		for _ in xrange(self.buffer_len):
			self._update_buffer()

	def _run_episode(self, agent, steps_left, obs
					, eps = 0.0, evaluating = False):
		self._prepare_game()

		start_lives = self.api.lives()
		step_count = 0
		sum_reward = 0
		is_terminal = False
		while step_count < steps_left and not is_terminal:
			self._get_screen(obs)
			action_id, _ = agent.get_action(obs, eps, evaluating)
			
			reward = self._repeat_action(self.minimal_actions[action_id])
			reward_clip = reward
			if self.max_reward > 0:
				reward_clip = np.clip(reward, -self.max_reward, self.max_reward)

			life_lost = not evaluating and self.api.lives() < start_lives
			is_terminal = self.api.game_over() or life_lost \
						or step_count + 1 >= steps_left

			agent.add_experience(obs, is_terminal, action_id, reward_clip
								, evaluating)
			sum_reward += reward
			step_count += 1
			
		return step_count, sum_reward

	def _update_buffer(self):
		self.api.getScreenGrayscale(self.merge_frame[self.merge_id, ...])
		self.merge_id = (self.merge_id + 1) % self.buffer_len

	def _repeat_action(self, action):
		reward = 0
		for i in xrange(self.repeat):
			reward += self.api.act(action)
			if i + self.buffer_len >= self.repeat:
				self._update_buffer()
		return reward

	def _get_screen(self, resized_frame):
		self._resize_frame(self.merge_frame.max(axis = 0), resized_frame)
				
	def _resize_frame(self, src_frame, dst_frame):
		cv2.resize(src = src_frame, dst = dst_frame,
					dsize = (self.width, self.height),
					interpolation = cv2.INTER_LINEAR)

	def _open_log_files(self, agent, folder):
		time_str = time.strftime("_%m-%d-%H-%M", time.localtime())
		base_rom_name = os.path.splitext(os.path.basename(self.rom_name))[0]


		if folder is not None:
			self.log_dir = folder
			self.network_dir = self.log_dir + '/network'
		else:
			self.log_dir = '../run_results/' + base_rom_name + time_str
			self.network_dir = self.log_dir + '/network'

		info_name = get_next_name(self.log_dir, 'info', 'txt')
		git_name = get_next_name(self.log_dir, 'git-diff', '')

		try:
			os.stat(self.log_dir)
		except OSError:
			os.makedirs(self.log_dir)

		try:
			os.stat(self.network_dir)
		except OSError:
			os.makedirs(self.network_dir)

		with open(os.path.join(self.log_dir, info_name), 'w') as f:
			f.write('Commit: ' + subprocess.check_output(['git', 'rev-parse'
														, 'HEAD']))
			f.write('Run command: ')
			f.write(' '.join(pipes.quote(x) for x in sys.argv))
			f.write('\n\n')
			f.write(agent.get_info())
			write_info(f, Environment)
			write_info(f, agent.__class__)
			write_info(f, agent.network.__class__)

		# From https://github.com/spragunr/deep_q_rl/pull/49/files
		with open(os.path.join(self.log_dir, git_name), 'w') as f:
			f.write(subprocess.check_output(['git', 'diff', 'HEAD']))

		if folder is not None:
			return

		with open(os.path.join(self.log_dir, 'results.csv'), 'w') as f:
			f.write("epoch,episode_train,validate_values,evaluate_reward"\
				",train_time,test_time,steps_per_second\n")

		mem = psutil.virtual_memory()
		with open(os.path.join(self.log_dir, 'memory.csv'), 'w') as f:
			f.write("epoch,available,free,buffers,cached"\
					",available_readable,used_percent\n")
			f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \
					(0, mem.available, mem.free, mem.buffers, mem.cached
					, bytes2human(mem.available), mem.percent))

	def _update_log_files(self, agent, epoch, episode, valid_values
						, eval_values, train_time, test_time, step_per_sec
						, store_freq):
		print "Updating log files"
		with open(self.log_dir + '/results.csv', 'a') as f:
			f.write("%d,%d,%.4f,%.4f,%d,%d,%.4f\n" % \
						(epoch, episode, valid_values, eval_values
						, train_time, test_time, step_per_sec))

		mem = psutil.virtual_memory()
		with open(self.log_dir + '/memory.csv', 'a') as f:
			f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \
					(epoch, mem.available, mem.free, mem.buffers, mem.cached
					, bytes2human(mem.available), mem.percent))

		agent.dump_network(self.network_dir + ('/%03d' % (epoch)) + '.npz')

		if (store_freq >= 0 and epoch >= Environment.EPOCH_COUNT) or \
				(store_freq > 0 and (epoch % store_freq == 0)):
			agent.dump_exp(self.network_dir + '/exp.npz')

	def _setup_record(self, network_file):
		file_name, _ = os.path.splitext(os.path.basename(network_file))
		time_str = time.strftime("_%m-%d-%H-%M", time.localtime())
		img_dir = os.path.dirname(network_file) + '/images_' \
					+ file_name + time_str
		rom_name, _ = os.path.splitext(self.rom_name)
		out_name = os.path.dirname(network_file) + '/' + rom_name + '_' \
					+ file_name + time_str + '.mov'
		print out_name

		try:
			os.stat(img_dir)
		except OSError:
			os.makedirs(img_dir)

		self.api.setString('record_screen_dir', img_dir)
		self.api.loadROM('../rom/' + self.rom_name)
		return img_dir, out_name

	def record_run(self, agent, network_file, episode_id = 1):
		if episode_id > 1:
			self.evaluate(agent, episode_id - 1)
			system_state = self.api.cloneSystemState()

		img_dir, out_name = self._setup_record(network_file)

		if episode_id > 1:
			self.api.restoreSystemState(system_state)

		self.evaluate(agent, 1)
		script = \
				"""
					{
						ffmpeg -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s
					} || {
						avconv -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s
					}
				""" % (img_dir, out_name, img_dir, out_name)
		os.system(script)
Esempio n. 31
0
class AtariEnvironment:
    def __init__(self, seed=1, record=False):
        self.ale = ALEInterface()
        self.ale.setBool(b'display_screen', FLAGS.display_screen or record)
        self.ale.setInt(b'frame_skip', 1)
        self.ale.setBool(b'color_averaging', False)
        self.ale.setInt(b'random_seed', seed)
        self.ale.setFloat(b'repeat_action_probability', FLAGS.sticky_prob)
        self.ale.setInt(b'max_num_frames_per_episode', FLAGS.max_num_frames_per_episode)

        if record:
            if not tf.gfile.Exists(FLAGS.record_dir):
                tf.gfile.MakeDirs(FLAGS.record_dir)
            self.ale.setBool(b'sound', True)
            self.ale.setString(b'record_screen_dir', str.encode(FLAGS.record_dir))
            self.ale.setString(b'record_sound_filename', str.encode(FLAGS.record_dir + '/sound.wav'))
            self.ale.setInt(b'fragsize', 64)

        self.ale.loadROM(str.encode(FLAGS.rom))

        self.ale.setMode(FLAGS.mode)
        self.ale.setDifficulty(FLAGS.difficulty)

        self.action_set = self.ale.getLegalActionSet()

        screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1,)
        self._frame_buffer = CircularBuffer(FLAGS.frame_buffer_size, screen_dims, np.uint8)

        self.reset()

    def _is_terminal(self):
        return self.ale.game_over()

    def _get_single_frame(self):
        stacked_frames = np.concatenate(self._frame_buffer, axis=2)
        maxed_frame = np.amax(stacked_frames, axis=2)
        expanded_frame = np.expand_dims(maxed_frame, 3)

        return expanded_frame

    def reset(self):
        self._episode_frames = 0
        self._episode_reward = 0

        self.ale.reset_game()
        for _ in range(FLAGS.frame_buffer_size):
            self._frame_buffer.append(self.ale.getScreenGrayscale())

    def act(self, action):
        assert not self._is_terminal()

        cum_reward = 0
        for _ in range(FLAGS.frame_skip):
            cum_reward += self.ale.act(self.action_set[action])
            self._frame_buffer.append(self.ale.getScreenGrayscale())

        self._episode_frames += FLAGS.frame_skip
        self._episode_reward += cum_reward
        cum_reward = np.clip(cum_reward, -1, 1)

        return cum_reward, self._get_single_frame(), self._is_terminal()

    def state(self):
        assert len(self._frame_buffer) == FLAGS.frame_buffer_size
        return self._get_single_frame()

    def num_actions(self):
        return len(self.action_set)

    def episode_reward(self):
        return self._episode_reward

    def episode_frames(self):
        return self._episode_frames

    def frame_skip(self):
        return FLAGS.frame_skip
Esempio n. 32
0
# Set USE_SDL to true to display the screen. ALE must be compilied
# with SDL enabled for this to work. On OSX, pygame init is used to
# proxy-call SDL_main.
USE_SDL = True
if USE_SDL:
  if sys.platform == 'darwin':
    import pygame
    pygame.init()
    ale.setBool('sound', False) # Sound doesn't work on OSX
  elif sys.platform.startswith('linux'):
    ale.setBool('sound', True)
  ale.setBool('display_screen', True)
  
exp_path =  "records/exp{}".format(sys.argv[2])
os.mkdir(exp_path)
ale.setString("record_screen_dir", exp_path)

# Load the ROM file
ale.loadROM(sys.argv[1])
print sys.argv[1]
#ale.loadROM("../../Roms/ROMS/montezuma_revenge.bin")

# Get the list of legal actions
legal_actions = ale.getLegalActionSet()
one_hot = {0:"1 0 0 0 0 0 0 0 0", 
  2:"0 1 0 0 0 0 0 0 0", 
  3:"0 0 1 0 0 0 0 0 0", 
  4:"0 0 0 1 0 0 0 0 0", 
  5:"0 0 0 0 1 0 0 0 0", 
  6:"0 0 0 0 0 1 0 0 0", 
  7:"0 0 0 0 0 0 1 0 0", 
Esempio n. 33
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    """
    def __init__(self, rom_file, viz=0, height_range=(None,None),
            frame_skip=4, image_shape=(84, 84), nullop_start=30,
            live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        self.ale = ALEInterface()
        self.rng = get_rng(self)

        self.ale.setInt("random_seed", self.rng.randint(0, 10000))
        self.ale.setBool("showinfo", False)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        self.ale.setInt("frame_skip", 1)
        self.ale.setBool('color_averaging', False)
        # manual.pdf suggests otherwise. may need to check
        self.ale.setFloat('repeat_action_probability', 0.0)

        # viz setup
        if isinstance(viz, six.string_types):
            assert os.path.isdir(viz), viz
            self.ale.setString('record_screen_dir', viz)
            viz = 0
        if isinstance(viz, int):
            viz = float(viz)
        self.viz = viz
        if self.viz and isinstance(self.viz, float):
            self.windowname = os.path.basename(rom_file)
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)

        self.ale.loadROM(rom_file)
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()


        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1],:]
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_num_actions(self):
        """
        :returns: the number of legal actions
        """
        return len(self.actions)

    def restart_episode(self):
        if self.current_episode_score.count > 0:
            self.stats['score'].append(self.current_episode_score.sum)
        self.current_episode_score.reset()
        self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if isOver:
            self.restart_episode()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        return (r, isOver)

    def get_stat(self):
        try:
            return {'avg_score': np.mean(self.stats['score']),
                    'max_score': float(np.max(self.stats['score'])) }
        except ValueError:
            return {}