Beispiel #1
0
    def __init__(self, rom_file, random_seed, frame_skip, repeat_action_probability, minimum_actions, use_sdl,
                 test_mode, image_processing=None):
        ALEInterface.__init__(self)
        # Set USE_SDL to true to display the screen. ALE must be compilied
        # with SDL enabled for this to work. On OSX, pygame init is used to
        # proxy-call SDL_main.
        if use_sdl:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.setBool('sound', True)
                self.setBool('display_screen', True)

        self.setFloat('repeat_action_probability', repeat_action_probability)
        self.setInt('frame_skip', frame_skip)

        self.random_seed = random_seed
        self.frame_skip = frame_skip # trolololo
        self.minimum_actions = minimum_actions
        self.test_mode = test_mode
        self.image_processing = image_processing
        self.num_actions = 0
        self.legal_actions = []
        self.queue = deque()
        self.height = -1
        self.width = -1

        self.loadROM(rom_file)

        height, width = self.getScreenDims()
        logging.info('Screen resolution is %dx%d' % (height, width))
Beispiel #2
0
class Emulator(object):
    FRAME_SKIP = 4
    SCREEN_WIDTH = 84
    SCREEN_HEIGHT = 84

    def __init__(self, rom):
        self.ale = ALEInterface()
        self.max_num_frames_per_episode = 100000 #self.ale.getInt('max_num_frames_per_episode')
        self.ale.setInt('frame_skip', self.FRAME_SKIP)
        self.ale.loadROM('roms/' + rom)
        self.actions = self.ale.getMinimalActionSet()
        
    def reset(self):
        self.ale.reset_game()

    def image(self):
        screen = self.ale.getScreenGrayscale()
        screen = cv2.resize(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH))
        return np.reshape(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH))

    def act(self, action):
        return self.ale.act(action)

    def terminal(self):
        return self.ale.game_over()
Beispiel #3
0
    def __init__(self, rom_file, viz=0, height_range=(None,None),
            frame_skip=4, image_shape=(84, 84), nullop_start=30,
            live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        self.ale = ALEInterface()
        self.rng = get_rng(self)

        self.ale.setInt("random_seed", self.rng.randint(0, 10000))
        self.ale.setBool("showinfo", False)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        self.ale.setInt("frame_skip", 1)
        self.ale.setBool('color_averaging', False)
        # manual.pdf suggests otherwise. may need to check
        self.ale.setFloat('repeat_action_probability', 0.0)

        # viz setup
        if isinstance(viz, six.string_types):
            assert os.path.isdir(viz), viz
            self.ale.setString('record_screen_dir', viz)
            viz = 0
        if isinstance(viz, int):
            viz = float(viz)
        self.viz = viz
        if self.viz and isinstance(self.viz, float):
            self.windowname = os.path.basename(rom_file)
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)

        self.ale.loadROM(rom_file)
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()


        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()
Beispiel #4
0
def map_game_to_ALE(game_name, interactive):
    game_path = '/cvgl/u/nishith/MultiTaskRL/libs/DQN_ale/roms/' \
                + game_name + '.bin'
    print game_path
    game = ALEInterface()
    if interactive:
        setup_display(game)
    game.loadROM(game_path)
    return game
Beispiel #5
0
 def act_with_frame_skip(self, a): # trolololo
     reward = 0
     game_over = False
     lives = ALEInterface.lives(self)
     for _ in xrange(self.frame_skip):
         reward += ALEInterface.act(self, self.legal_actions[a])
         if ALEInterface.game_over(self) or (not self.test_mode and ALEInterface.lives(self) < lives):
             game_over = True
     return reward, game_over
def peekActionSize(rom):
  if args.use_gym:
    import gym
    env = gym.make(args.gym_env)
    return env.action_space.n
  else:
    from ale_python_interface import ALEInterface
    ale = ALEInterface()
    ale.loadROM(rom.encode('ascii'))
    return len(ale.getMinimalActionSet())
Beispiel #7
0
 def loadROM(self, rom_file):
     ALEInterface.loadROM(self, rom_file)
     if self.minimum_actions:
         self.legal_actions = self.getMinimalActionSet()
     else:
         self.legal_actions = self.getLegalActionSet()
     self.num_actions = len(self.legal_actions)
     self.setInt('frame_skip', self.frame_skip)
     if self.random_seed is not None:
         self.setInt('random_seed', self.random_seed)
     self.height, self.width = self.getScreenDims()
Beispiel #8
0
class AtariMDP(MDP, Serializable):

    def __init__(self, rom_path, obs_type=OBS_RAM, frame_skip=4):
        Serializable.__init__(self, rom_path, obs_type, frame_skip)
        self.options = (rom_path, obs_type, frame_skip)
        
        self.ale = ALEInterface()
        self.ale.loadROM(rom_path)        
        self._rom_path = rom_path
        self._obs_type = obs_type
        self._action_set = self.ale.getMinimalActionSet()
        self.frame_skip = frame_skip


    def get_image(self):
        return to_rgb(self.ale)
    def get_ram(self):
        return to_ram(self.ale)
    def game_over(self):
        return self.ale.game_over()
    def reset_game(self):
        return self.ale.reset_game()

    @property
    def n_actions(self):
        return len(self.action_set)

    def get_obs(self):
        if self._obs_type == OBS_RAM:
            return self.get_ram()[None,:]
        else:
            assert self._obs_type == OBS_IMAGE
            return self.get_image()[None,:,:,:]

    def step(self, a):

        reward = 0.0
        action = self.action_set[a]
        for _ in xrange(self.frame_skip):
            reward += self.ale.act(action)
        ob = self.get_obs().reshape(1,-1)
        return ob, np.array([reward]), self.ale.game_over()

    # return: (states, observations)
    def reset(self):
        self.ale.reset_game()
        return self.get_obs()

    @property
    def action_set(self):
        return self._action_set

    def plot(self):
        import cv2
        cv2.imshow("atarigame",self.get_image()) #pylint: disable=E1101
        cv2.waitKey(10) #pylint: disable=E1101
Beispiel #9
0
def init():

  pygame.init()
  rom_path = '/Users/maciej/Development/atari-roms'
  ale = ALEInterface()
  ale.setInt('random_seed', 123)
  ale.setBool('frame_skip', 1)
  ale.loadROM(rom_path + '/space_invaders.bin')
  ale.setFloat("repeat_action_probability", 0)
  return ale
Beispiel #10
0
    def __init__(self, game, args):
        self.game = game
        self.ale = ALEInterface()

        # if sys.platform == 'darwin':
        #     self.ale.setBool('sound', False)  # Sound doesn't work on OSX
        # elif sys.platform.startswith('linux'):
        #     self.ale.setBool('sound', True)
        # self.ale.setBool('display_screen', True)
        #
        self.ale.setBool('display_screen', args.display_screen)

        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)
        self.ale.setInt('random_seed', args.random_seed)

        #
        # if rand_seed is not None:
        #     self.ale.setInt('random_seed', rand_seed)

        rom_file = "./roms/%s.bin" % game
        if not os.path.exists(rom_file):
            print "not found rom file:", rom_file
            sys.exit(-1)
        self.ale.loadROM(rom_file)

        self.actions = self.ale.getMinimalActionSet()
Beispiel #11
0
	def __init__(self, rom_name, rng, display_screen = False):
		self.api = ALEInterface()
		self.api.setInt('random_seed', rng.randint(333))
		self.api.setBool('display_screen', display_screen)
		self.api.setFloat('repeat_action_probability', 0.0)
		self.rom_name = rom_name
		self.display_screen = display_screen
		self.rng = rng
		self.repeat = Environment.FRAMES_SKIP
		self.buffer_len = Environment.BUFFER_LEN
		self.height = Environment.FRAME_HEIGHT
		self.width = Environment.FRAME_WIDTH
		self.episode_steps = Environment.EPISODE_FRAMES / Environment.FRAMES_SKIP
		self.merge_id = 0
		self.max_reward = Environment.MAX_REWARD
		self.eval_eps = Environment.EVAL_EPS
		self.log_dir = ''
		self.network_dir = ''

		self.api.loadROM('../rom/' + self.rom_name)
		self.minimal_actions = self.api.getMinimalActionSet()
		original_width, original_height = self.api.getScreenDims()
		self.merge_frame = np.zeros((self.buffer_len
								, original_height
								, original_width)
								, dtype = np.uint8)
Beispiel #12
0
    def __init__(self, rng, rom="ale/breakout.bin", frame_skip=4, 
                 ale_options=[{"key": "random_seed", "value": 0}, 
                              {"key": "color_averaging", "value": True},
                              {"key": "repeat_action_probability", "value": 0.}]):
        self._mode = -1
        self._modeScore = 0.0
        self._modeEpisodeCount = 0

        self._frameSkip = frame_skip if frame_skip >= 1 else 1
        self._randomState = rng

        self._ale = ALEInterface()
        for option in ale_options:
            t = type(option["value"])
            if t is int:
                self._ale.setInt(option["key"], option["value"])
            elif t is float:
                self._ale.setFloat(option["key"], option["value"])
            elif t is bool:
                self._ale.setBool(option["key"], option["value"])
            else:
                raise ValueError("Option {} ({}) is not an int, bool or float.".format(option["key"], t))
        self._ale.loadROM(rom)

        w, h = self._ale.getScreenDims()
        self._screen = np.empty((h, w), dtype=np.uint8)
        self._reducedScreen = np.empty((84, 84), dtype=np.uint8)
        self._actions = self._ale.getMinimalActionSet()
Beispiel #13
0
    def __init__(self):
    
        self.ale = ALEInterface()
        
        # turn off the sound
        self.ale.setBool('sound', False)
        
        self.ale.setBool('display_screen', EMULATOR_DISPLAY)

        self.ale.setInt('frame_skip', FRAME_SKIP)
        self.ale.setFloat('repeat_action_probability', REPEAT_ACTION_PROBABILITY)
        self.ale.setBool('color_averaging', COLOR_AVERAGING)

        self.ale.setInt('random_seed', RANDOM_SEED)

        if RECORD_SCENE_PATH:
            self.ale.setString('record_screen_dir', RECORD_SCENE_PATH)


        self.ale.loadROM(ROM_PATH)

        self.actions = self.ale.getMinimalActionSet()
        logger.info("Actions: " + str(self.actions))

        self.dims = DIMS
  def __init__(self, rand_seed, display=False):
    self.ale = ALEInterface()
    self.ale.setInt('random_seed', rand_seed)

    if display:
      self._setup_display()
    
    self.ale.loadROM(ROM)

    # height=210, width=160
    self.screen = np.empty((210, 160, 1), dtype=np.uint8)
    
    no_action = 0
    
    self.reward = self.ale.act(no_action)
    self.terminal = self.ale.game_over()

    # screenのshapeは、(210, 160, 1)
    self.ale.getScreenGrayscale(self.screen)
    
    # (210, 160)にreshape
    reshaped_screen = np.reshape(self.screen, (210, 160))
    
    # height=110, width=84にリサイズ
    resized_screen = cv2.resize(reshaped_screen, (84, 110))
    
    x_t = resized_screen[18:102,:]
    x_t = x_t.astype(np.float32)
    x_t *= (1.0/255.0)
    self.s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2)

    # 実際に利用するactionのみを集めておく
    self.real_actions = self.ale.getMinimalActionSet()
	def __init__(self, args):
		''' Initialize Atari environment '''

		# Parameters
		self.buffer_length = args.buffer_length
		self.screen_dims = args.screen_dims
		self.frame_skip = args.frame_skip
		self.blend_method = args.blend_method
		self.reward_processing = args.reward_processing
		self.max_start_wait = args.max_start_wait
		self.history_length = args.history_length
		self.start_frames_needed = self.buffer_length - 1 + ((args.history_length - 1) * self.frame_skip)

		#Initialize ALE instance
		self.ale = ALEInterface()
		self.ale.setFloat(b'repeat_action_probability', 0.0)
		if args.watch:
			self.ale.setBool(b'sound', True)
			self.ale.setBool(b'display_screen', True)
		self.ale.loadROM(str.encode(args.rom_path + '/' + args.game + '.bin'))

		self.buffer = np.empty((self.buffer_length, 210, 160))
		self.current = 0
		self.action_set = self.ale.getMinimalActionSet()
		self.lives = self.ale.lives()

		self.reset()
Beispiel #16
0
 def __init__(self, rom_path, num_frames=4, live=False, skip_frame=0, mode='normal'):
     self.ale = ALEInterface()
     if live:
         USE_SDL = True
         if USE_SDL:
             if sys.platform == 'darwin':
                 import pygame
                 pygame.init()
                 self.ale.setBool('sound', False) # Sound doesn't work on OSX
             elif sys.platform.startswith('linux'):
                 self.ale.setBool('sound', True)
         self.ale.setBool('display_screen', True)
     self.mode = mode
     self.live = live
     self.ale.loadROM(rom_path)
     self.num_frames = num_frames
     self.frames = []
     self.frame_id = 0
     self.cum_reward = 0
     self.skip_frame = skip_frame
     if mode == 'small':
         img = T.matrix('img')
         self.max_pool = theano.function([img], max_pool_2d(img, [4, 4]))
         self.img_shape = (16, 16)
     else:
         self.img_shape = (84, 84) # image shape according to DQN Nature paper.
     while len(self.frames) < 4:
         self.step(choice(self.valid_actions, 1)[0])
     self.reset()
  def __init__(self, rom_file, display_screen=False,frame_skip=4,screen_height=84,screen_width=84,repeat_action_probability=0,color_averaging=True,random_seed=0,record_screen_path='screen_pics',record_sound_filename=None,minimal_action_set=True):
    self.ale = ALEInterface()
    if display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', frame_skip)
    self.ale.setFloat('repeat_action_probability', repeat_action_probability)
    self.ale.setBool('color_averaging', color_averaging)

    if random_seed:
      self.ale.setInt('random_seed', random_seed)

    self.ale.loadROM(rom_file)

    if minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
    else:
      self.actions = self.ale.getLegalActionSet()

    self.dims = (screen_width,screen_height)
Beispiel #18
0
    def __init__(self, rom_file, frame_skip=1, viz=0):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames
        :param viz: the delay. visualize the game while running. 0 to disable
        """
        self.ale = ALEInterface()
        self.rng = get_rng(self)

        self.ale.setInt("random_seed", self.rng.randint(self.rng.randint(0, 1000)))
        self.ale.setInt("frame_skip", frame_skip)
        self.ale.loadROM(rom_file)
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        if isinstance(viz, int):
            viz = float(viz)
        self.viz = viz
        self.romname = os.path.basename(rom_file)
        if self.viz and isinstance(self.viz, float):
            cv2.startWindowThread()
            cv2.namedWindow(self.romname)

        self._reset()
        self.last_image = self._grab_raw_image()
        self.framenum = 0
Beispiel #19
0
 def __init__(self):
     self.ale = ALEInterface()
     self.ale.setInt('random_seed', 123)
     self.ale.setBool("display_screen", False)
     self.ale.setBool("sound", False)
     self.ale.loadROM("%s/breakout.bin" % rom_directory)
     self.current_state = [
         self.ale.getScreenRGB(), self.ale.getScreenRGB()
     ]
 def __init__(self):
     self._ale = ALEInterface()
     self._ale.setInt('random_seed', 123)
     self._ale.setFloat('repeat_action_probability', 0.0)
     self._ale.setBool('color_averaging', False)
     self._ale.loadROM('roms/enduro.bin')
     self._controller = Controller(self._ale)
     self._extractor = StateExtractor(self._ale)
     self._image = None
Beispiel #21
0
 def __init__(self, settings):
     self.ale = ALEInterface()
     self.ale.setInt('frame_skip', settings['frame_skip'])
     self.ale.setInt('random_seed', np.random.RandomState().randint(1000))
     self.ale.setBool('color_averaging', False)
     self.ale.loadROM('roms/' + settings['rom_name'])
     self.actions = self.ale.getMinimalActionSet()
     self.width = settings['screen_width']
     self.height = settings['screen_height']
Beispiel #22
0
 def __init__(self, rom_path, obs_type=OBS_RAM, frame_skip=4):
     Serializable.__init__(self, rom_path, obs_type, frame_skip)
     self.options = (rom_path, obs_type, frame_skip)
     
     self.ale = ALEInterface()
     self.ale.loadROM(rom_path)        
     self._rom_path = rom_path
     self._obs_type = obs_type
     self._action_set = self.ale.getMinimalActionSet()
     self.frame_skip = frame_skip
Beispiel #23
0
 def __init__(self, show_screen, history_length):
     self.ale = ALEInterface()
     self.ale.setInt('frame_skip', 4)
     self.history = None
     self.history_length = history_length
     if show_screen:
         self.display_screen()
     self.load_game()
     (screen_width, screen_height) = self.ale.getScreenDims()
     self.screen_data = np.empty((screen_height, screen_width, 1), dtype=np.uint8)  # 210x160 screen data
     self.dims = (84, 84)  # input size for neural network
     self.actions = [3, 0, 1, 4]  # noop, left, right, fire,
Beispiel #24
0
    def __init__(self, processing_cls, game_rom=None, encoder_model=None, encoder_weights=None, NFQ_model=None, NFQ_weights=None):
        assert game_rom is not None
        self.game = ALEInterface()
        if encoder_weights is not None and encoder_model is not None:
            self.encoder = Encoder(path_to_model=encoder_model, path_to_weights=encoder_weights)
        else:
            self.encoder = Encoder()

        self.processor = processing_cls()

        # Get & Set the desired settings
        self.game.setInt('random_seed', 0)
        self.game.setInt('frame_skip', 4)

        # Set USE_SDL to true to display the screen. ALE must be compilied
        # with SDL enabled for this to work. On OSX, pygame init is used to
        # proxy-call SDL_main.
        USE_SDL = True

        if USE_SDL:
            if sys.platform == 'darwin':
                pygame.init()
                self.game.setBool('sound', False)   # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.game.setBool('sound', False)   # no sound

            self.game.setBool('display_screen', True)

        # Load the ROM file
        self.game.loadROM(game_rom)

        # Get the list of legal actions
        self.legal_actions = self.game.getLegalActionSet()

        # Get actions applicable in current game
        self.minimal_actions = self.game.getMinimalActionSet()

        if NFQ_model is not None and NFQ_weights is not None:
            self.NFQ = NFQ(
                self.encoder.out_dim,
                len(self.minimal_actions),
                model_path=NFQ_model,
                weights_path=NFQ_weights
            )
        else:
            self.NFQ = NFQ(self.encoder.out_dim, len(self.minimal_actions))

        (self.screen_width, self.screen_height) = self.game.getScreenDims()
        self.screen_data = np.zeros(
            (self.screen_height, self.screen_width),
            dtype=np.uint8
        )
	def __init__(self,rom_name):
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode")
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('./' +rom_name)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i
		#print len(self.legal_actions)
		self.windowname = rom_name
    def __init__(self, rom_file, sdl=False):
        self.ale = ALEInterface()
        # Setup SDL
        if sdl:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool(b'sound', False) # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool(b'sound', True)
            self.ale.setBool(b'display_screen', True)

        # Load rom
        self.ale.loadROM(str.encode(rom_file))
    def init(self,rom_file,ale_frame_skip):

        self.ale = ALEInterface()

        self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode");
        self.ale.set("random_seed",123)
        self.ale.set("disable_color_averaging",1)
        self.ale.set("frame_skip",ale_frame_skip)

        self.ale.loadROM(rom_file)
        self.legal_actions = self.ale.getMinimalActionSet()
        ram_size = self.ale.getRAMSize()
        self.ram = np.zeros((ram_size),dtype=np.uint8)
        self.ale.getRAM(self.ram)

        self.state = self.ale.getRAM(self.ram)
Beispiel #28
0
    def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episodes = False):
        
        self.ale = ALEInterface()

        self.ale.setInt("random_seed", rseed * (actor_id +1))

        # For fuller control on explicit action repeat (>= ALE 0.5.0) 
        self.ale.setFloat("repeat_action_probability", 0.0)
        
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt("frame_skip", 1)
        self.ale.setBool("color_averaging", False)
        self.ale.loadROM(rom_path + "/" + rom_name + ".bin")
        self.legal_actions = self.ale.getMinimalActionSet()        
        self.screen_width,self.screen_height = self.ale.getScreenDims()
        #self.ale.setBool('display_screen', True)
        
        # Processed historcal frames that will be fed in to the network 
        # (i.e., four 84x84 images)
        self.screen_images_processed = np.zeros((IMG_SIZE_X, IMG_SIZE_Y, 
            NR_IMAGES)) 
        self.rgb_screen = np.zeros((self.screen_height,self.screen_width, 3), dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height,self.screen_width,1), dtype=np.uint8)

        self.frame_pool = np.empty((2, self.screen_height, self.screen_width))
        self.current = 0
        self.lives = self.ale.lives()

        self.visualize = visualize
        self.visualize_processed = False
        self.windowname = rom_name + ' ' + str(actor_id)
        if self.visualize:
            logger.debug("Opening emulator window...")
            #from skimage import io
            #io.use_plugin('qt')
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)
            logger.debug("Emulator window opened")
            
        if self.visualize_processed:
            logger.debug("Opening processed frame window...")
            cv2.startWindowThread()
            logger.debug("Processed frame window opened")
            cv2.namedWindow(self.windowname + "_processed")
            
        self.single_life_episodes = single_life_episodes
  def __init__(self, rand_seed, display=False, no_op_max=7):
    self.ale = ALEInterface()
    self.ale.setInt('random_seed', rand_seed)
    self._no_op_max = no_op_max

    if display:
      self._setup_display()
    
    self.ale.loadROM(ROM)

    # collect minimal action set
    self.real_actions = self.ale.getMinimalActionSet()

    # height=210, width=160
    self._screen = np.empty((210, 160, 1), dtype=np.uint8)

    self.reset()
Beispiel #30
0
  def __init__(self, rom_file, args):
    from ale_python_interface import ALEInterface
    self.ale = ALEInterface()
    if args.display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', args.frame_skip)
    self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
    self.ale.setBool('color_averaging', args.color_averaging)

    if args.random_seed:
      self.ale.setInt('random_seed', args.random_seed)

    if args.record_screen_path:
      if not os.path.exists(args.record_screen_path):
        logger.info("Creating folder %s" % args.record_screen_path)
        os.makedirs(args.record_screen_path)
      logger.info("Recording screens to %s", args.record_screen_path)
      self.ale.setString('record_screen_dir', args.record_screen_path)

    if args.record_sound_filename:
      logger.info("Recording sound to %s", args.record_sound_filename)
      self.ale.setBool('sound', True)
      self.ale.setString('record_sound_filename', args.record_sound_filename)

    self.ale.loadROM(rom_file)

    if args.minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
      logger.info("Using minimal action set with size %d" % len(self.actions))
    else:
      self.actions = self.ale.getLegalActionSet()
      logger.info("Using full action set with size %d" % len(self.actions))
    logger.debug("Actions: " + str(self.actions))

    self.screen_width = args.screen_width
    self.screen_height = args.screen_height

    self.life_lost = False
Beispiel #31
0
#!/usr/bin/env python
# python_example.py
# Author: Ben Goodrich
#
# This is a direct port to python of the shared library example from
# ALE provided in doc/examples/sharedLibraryInterfaceExample.cpp
import sys
from random import randrange
from ale_python_interface import ALEInterface

if len(sys.argv) < 2:
    print 'Usage:', sys.argv[0], 'rom_file'
    sys.exit()

ale = ALEInterface()

# Get & Set the desired settings
ale.setInt('random_seed', 123)

# Set USE_SDL to true to display the screen. ALE must be compilied
# with SDL enabled for this to work. On OSX, pygame init is used to
# proxy-call SDL_main.
USE_SDL = False
if USE_SDL:
    if sys.platform == 'darwin':
        import pygame
        pygame.init()
        ale.setBool('sound', False)  # Sound doesn't work on OSX
    elif sys.platform.startswith('linux'):
        ale.setBool('sound', True)
    ale.setBool('display_screen', True)
Beispiel #32
0
class ALEEnvironment(Environment):
    def __init__(self, rom_file, args):
        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()
        if args.display_screen:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability',
                          args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)

        if args.random_seed:
            self.ale.setInt('random_seed', args.random_seed)

        if args.record_screen_path:
            if not os.path.exists(args.record_screen_path):
                logger.info("Creating folder %s" % args.record_screen_path)
                os.makedirs(args.record_screen_path)
            logger.info("Recording screens to %s", args.record_screen_path)
            self.ale.setString('record_screen_dir', args.record_screen_path)

        if args.record_sound_filename:
            logger.info("Recording sound to %s", args.record_sound_filename)
            self.ale.setBool('sound', True)
            self.ale.setString('record_sound_filename',
                               args.record_sound_filename)

        self.ale.loadROM(rom_file)

        if args.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
            logger.info("Using minimal action set with size %d" %
                        len(self.actions))
        else:
            self.actions = self.ale.getLegalActionSet()
            logger.info("Using full action set with size %d" %
                        len(self.actions))
        logger.debug("Actions: " + str(self.actions))

        self.screen_width = args.screen_width
        self.screen_height = args.screen_height

    def numActions(self):
        return len(self.actions)

    def restart(self):
        self.ale.reset_game()

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def getScreen(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, (self.screen_width, self.screen_height))
        return resized

    def isTerminal(self):
        return self.ale.game_over()
Beispiel #33
0
# random_boxing.py
# Two random agents playing against each other Boxing with different frame skips
# to evaluate which impact frame skip has on the game.
# Author: Jens Roewekamp
#
import sys
import os

#check if ALE_PATH exits and add to path if it exits
if os.environ['ALE_PATH']:
    sys.path.append(os.environ['ALE_PATH'])

from random import randrange
from ale_python_interface import ALEInterface

ale = ALEInterface()

# Get & Set the desired settings
ale.setInt('random_seed', 123)

# Set USE_SDL to true to display the screen. ALE must be compilied
# with SDL enabled for this to work. On OSX, pygame init is used to
# proxy-call SDL_main.
USE_SDL = False
if USE_SDL:
    if sys.platform == 'darwin':
        import pygame
        pygame.init()
        ale.setBool('sound', False)  # Sound doesn't work on OSX
    elif sys.platform.startswith('linux'):
        ale.setBool('sound', True)
Beispiel #34
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    """
    def __init__(self, rom_file, viz=0, height_range=(None,None),
            frame_skip=4, image_shape=(84, 84), nullop_start=30,
            live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        self.ale = ALEInterface()
        self.rng = get_rng(self)

        self.ale.setInt("random_seed", self.rng.randint(0, 10000))
        self.ale.setBool("showinfo", False)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        self.ale.setInt("frame_skip", 1)
        self.ale.setBool('color_averaging', False)
        # manual.pdf suggests otherwise. may need to check
        self.ale.setFloat('repeat_action_probability', 0.0)

        # viz setup
        if isinstance(viz, six.string_types):
            assert os.path.isdir(viz), viz
            self.ale.setString('record_screen_dir', viz)
            viz = 0
        if isinstance(viz, int):
            viz = float(viz)
        self.viz = viz
        if self.viz and isinstance(self.viz, float):
            self.windowname = os.path.basename(rom_file)
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)

        self.ale.loadROM(rom_file)
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()


        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1],:]
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_num_actions(self):
        """
        :returns: the number of legal actions
        """
        return len(self.actions)

    def restart_episode(self):
        if self.current_episode_score.count > 0:
            self.stats['score'].append(self.current_episode_score.sum)
        self.current_episode_score.reset()
        self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if isOver:
            self.restart_episode()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        return (r, isOver)

    def get_stat(self):
        try:
            return {'avg_score': np.mean(self.stats['score']),
                    'max_score': float(np.max(self.stats['score'])) }
        except ValueError:
            return {}
Beispiel #35
0
class Environment:
    def __init__(self, render=False):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', 0)
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        self.ale.setBool(b'color_averaging', True)
        self.ale.setInt(b'frame_skip', 4)
        self.ale.setBool(b'display_screen', render)
        self.ale.loadROM(ENV.encode('ascii'))
        self._screen = np.empty((210, 160, 1), dtype=np.uint8)
        self._no_op_max = 7

    def set_render(self, render):
        if not render:
            self.ale.setBool(b'display_screen', render)

    def reset(self):
        self.ale.reset_game()

        # randomize initial state
        if self._no_op_max > 0:
            no_op = np.random.randint(0, self._no_op_max + 1)
            for _ in range(no_op):
                self.ale.act(0)

        self.ale.getScreenGrayscale(self._screen)
        screen = np.reshape(self._screen, (210, 160))
        screen = cv2.resize(screen, (84, 110))
        screen = screen[18:102, :]
        screen = screen.astype(np.float32)
        screen /= 255.0

        self.frame_buffer = np.stack((screen, screen, screen, screen), axis=2)
        return self.frame_buffer

    def act(self, action):

        reward = self.ale.act(4 + action)
        done = self.ale.game_over()

        self.ale.getScreenGrayscale(self._screen)
        screen = np.reshape(self._screen, (210, 160))
        screen = cv2.resize(screen, (84, 110))
        screen = np.reshape(screen[18:102, :], (84, 84, 1))
        screen = screen.astype(np.float32)
        screen *= (1 / 255.0)

        self.frame_buffer = np.append(self.frame_buffer[:, :, 1:],
                                      screen,
                                      axis=2)

        return self.frame_buffer, reward, done, ""

    def close(self):
        self.ale.setBool(b'display_screen', False)
Beispiel #36
0
class AtariEnvironment:
    def __init__(self, frame_shape, frame_postprocess=lambda x: x):
        self.ale = ALEInterface()
        self.ale.setBool(b"display_screen", cfg.display_screen)
        self.ale.setInt(b"frame_skip", 1)
        self.ale.setBool(b"color_averaging", False)
        self.ale.setInt(b"random_seed", cfg.random_seed)
        self.ale.setFloat(b"repeat_action_probability", cfg.sticky_prob)

        self.ale.loadROM(str.encode(cfg.rom))

        self.ale.setMode(cfg.mode)
        self.ale.setDifficulty(cfg.difficulty)

        self.action_set = self.ale.getLegalActionSet()
        # self.action_set = self.ale.getMinimalActionSet()
        assert len(self.action_set) == cfg.num_actions

        screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1, )
        self._frame_buffer = CircularBuffer(cfg.frame_buffer_size, screen_dims,
                                            np.uint8)
        self._frame_stack = CircularBuffer(cfg.frame_history_size, frame_shape,
                                           np.uint8)
        self._frame_postprocess = frame_postprocess

        self._episode_count = 0
        self.reset(inc_episode_count=False)

    def _is_terminal(self):
        return self.ale.game_over()

    def _get_single_frame(self):
        stacked_frames = np.concatenate(self._frame_buffer, axis=2)
        maxed_frame = np.amax(stacked_frames, axis=2)
        expanded_frame = np.expand_dims(maxed_frame, 3)
        frame = self._frame_postprocess(expanded_frame)

        return frame

    def reset(self, inc_episode_count=True):
        self._episode_frames = 0
        self._episode_reward = 0
        if inc_episode_count:
            self._episode_count += 1

        self.ale.reset_game()
        for _ in range(cfg.frame_buffer_size):
            self._frame_buffer.append(self.ale.getScreenGrayscale())
        for _ in range(cfg.frame_history_size):
            self._frame_stack.append(self._get_single_frame())

    def act(self, action):
        assert not self._is_terminal()

        cum_reward = 0
        for _ in range(cfg.frame_skip):
            cum_reward += self.ale.act(self.action_set[action])
            self._frame_buffer.append(self.ale.getScreenGrayscale())

        self._frame_stack.append(self._get_single_frame())
        self._episode_frames += cfg.frame_skip
        self._episode_reward += cum_reward
        cum_reward = np.clip(cum_reward, -1, 1)

        return cum_reward, self.state, self._is_terminal()

    @property
    def state(self):
        assert len(self._frame_buffer) == cfg.frame_buffer_size
        assert len(self._frame_stack) == cfg.frame_history_size
        return np.concatenate(self._frame_stack, axis=-1)

    @property
    def episode_reward(self):
        return self._episode_reward

    @property
    def episode_frames(self):
        return self._episode_frames

    @property
    def episode_steps(self):
        return self._episode_frames // cfg.frame_skip

    @property
    def episode_count(self):
        return self._episode_count
Beispiel #37
0
class Agent(object):
    def __init__(self):
        self._ale = ALEInterface()
        self._ale.setInt('random_seed', 123)
        self._ale.setFloat('repeat_action_probability', 0.0)
        self._ale.setBool('color_averaging', False)
        self._ale.loadROM('roms/enduro.bin')
        self._controller = Controller(self._ale)
        self._extractor = StateExtractor(self._ale)
        self._image = None
        self._speed_range = 50

    def run(self, learn, episodes=1, draw=False):
        """ Implements the playing/learning loop.

        Args:
            learn(bool): Whether the self.learn() function should be called.
            episodes (int): The number of episodes to run the agent for.
            draw (bool): Whether to overlay the environment state on the frame.

        Returns:
            None
        """
        for e in range(episodes):
            self._relative_speed = -self._speed_range

            # Observe the environment to set the initial state
            (road, cars, grid, self._image) = self._extractor.run(draw=draw,
                                                                  scale=4.0)
            self.initialise(road, cars, self._relative_speed, grid)

            num_frames = self._ale.getFrameNumber()

            # Each episode lasts 6500 frames
            while self._ale.getFrameNumber() - num_frames < 6500:
                # Take an action
                self.act()

                # Update the environment grid
                (road, cars, grid,
                 self._image) = self._extractor.run(draw=draw, scale=4.0)

                if self.collision(cars):
                    self._relative_speed = -self._speed_range

                self.sense(road, cars, self._relative_speed, grid)

                # Perform learning if required
                if learn:
                    self.learn()

                self.callback(learn, e + 1,
                              self._ale.getFrameNumber() - num_frames)
            self._ale.reset_game()

    def collision(self, cars):
        if not cars['others']:
            return False

        x, y, _, _ = cars['self']

        min_dist = sys.float_info.max
        min_angle = 0.

        for c in cars['others']:
            cx, cy, _, _ = c
            dist = np.sqrt((cx - x)**2 + (cy - y)**2)
            if dist < min_dist:
                min_dist = dist
                min_angle = np.arctan2(y - cy, cx - x)

        return min_dist < 18. and 0.1 * np.pi < min_angle and min_angle < 0.9 * np.pi

    def getActionsSet(self):
        """ Returns the set of all possible actions
        """
        return [Action.ACCELERATE, Action.RIGHT, Action.LEFT, Action.BRAKE]

    def move(self, action):
        """ Executes the action and advances the game to the next state.

        Args:
            action (int): The action which should executed. Make sure to use
                          the constants returned by self.getActionsSet()

        Returns:
           int: The obtained reward after executing the action
        """

        if action == Action.ACCELERATE:
            self._relative_speed = min(self._relative_speed + 1,
                                       self._speed_range)
        elif action == Action.BRAKE:
            self._relative_speed = max(self._relative_speed - 1,
                                       -self._speed_range)

        return self._controller.move(action)

    def initialise(self, road, cars, speed, grid):
        """ Called at the beginning of each episode, mainly used
        for state initialisation. For more information on the arguments
        have a look at the README.md

        Args:
            road: 2-dimensional array containing [x, y] points
                  in pixel coordinates of the road grid
            cars: dictionary which contains the location and the size
                  of the agent and the opponents in pixel coordinates
            speed: the relative speed of the agent with respect the others
            gird:  2-dimensional numpy array containing the latest grid
                   representation of the environment

        Returns:
            None
        """
        raise NotImplementedError

    def act(self):
        """ Called at each loop iteration to choose and execute an action.

        Returns:
            None
        """
        raise NotImplementedError

    def sense(self, road, cars, speed, grid):
        """ Called at each loop iteration to construct the new state from
        the update environment grid. For more information on the arguments
        have a look at the README.md

        Args:
            road: 2-dimensional array containing [x, y] points
                  in pixel coordinates of the road grid
            cars: dictionary which contains the location and the size
                  of the agent and the opponents in pixel coordinates
            speed: the relative speed of the agent with respect the others
            gird: 2-dimensional numpy array containing the latest grid
                  representation of the environment
        Returns:
            None
        """
        raise NotImplementedError

    def learn(self):
        """ Called at each loop iteration when the agent is learning. It should
        implement the learning procedure.

        Returns:
            None
        """
        raise NotImplementedError

    def callback(self, learn, episode, iteration):
        """ Called at each loop iteration mainly for reporting purposes.

        Args:
            learn (bool): Indicates whether the agent is learning or not.
            episode (int): The number of the current episode.
            iteration (int): The number of the current iteration.

        Returns:
            None
        """

        raise NotImplementedError
Beispiel #38
0
SEE_SCREEN = True if sys.argv[3] == 'set_screen' else False

# Generate future positions of Bert dpeending on current position and action
FUTURE_POS = gen_future_locs(BOX_LOCS)

# Learning hyperparameters
episodes = 400  # how many episodes to wait before moving the weights
max_time = 10000
gamma = 0.99  # discount factor for reward
lr = 1e-4
NUM_FEATURES = 31
weights = [rd.random() for _ in range(NUM_FEATURES)]
e = 0.15 if USE_OPTIMISTIC_PRIOR == False else 0.00

# Initialize learning environment
ale = ALEInterface()
ale.setBool('sound', False)
ale.setBool('display_screen', SEE_SCREEN)
ale.setInt('frame_skip', 1)
ale.setInt('random_seed', SEED)
rd.seed(SEED)
ale.loadROM("qbert.bin")
ELPASED_FRAME = 0

# Possible positions of Bert in the RAM right beforetaking any action
MEM_POS = [[69, 77], [92, 77], [120, 65], [97, 65], [147, 53], [124, 53],
           [152, 41], [175, 41], [180, 29], [203, 29], [231, 16], [231, 41],
           [175, 65], [180, 53], [203, 53], [147, 77], [120, 93], [152, 65],
           [231, 65], [175, 93], [97, 93], [180, 77], [231, 93], [180, 105],
           [147, 105], [203, 77], [175, 77], [175, 117],
           [231, 117], [203, 129], [203, 105], [180, 129], [231, 141],
Beispiel #39
0
class ALE(Environment):
    def __init__(self,
                 rom,
                 frame_skip=1,
                 reward_clipping=None,
                 repeat_action_probability=0.0,
                 loss_of_life_termination=False,
                 loss_of_life_reward=0,
                 display_screen=False,
                 seed=np.random.RandomState()):
        """
        Initialize ALE.

        Args:
            rom: Rom filename and directory.
            frame_skip: Repeat action for n frames. Default 1.
            reward_clipping: Clip rewards between (low, high). Can be None. Default None.
            repeat_action_probability: Repeats last action with given probability. Default 0.
            loss_of_life_termination: Signals a terminal state on loss of life. Default False.
            loss_of_life_reward: Reward/Penalty on loss of life (negative values are a penalty). Default 0.
            display_screen: Displays the emulator screen. Default False.
            seed: Random seed
        """

        self.ale = ALEInterface()
        self.rom = rom

        self.ale.setBool(b'display_screen', display_screen)
        self.ale.setInt(b'random_seed', seed.randint(0, 9999))
        self.ale.setFloat(b'repeat_action_probability',
                          repeat_action_probability)
        self.ale.setBool(b'color_averaging', False)
        self.ale.setInt(b'frame_skip', frame_skip)

        # all set commands must be done before loading the ROM
        self.ale.loadROM(rom.encode())

        # setup gamescreen object
        width, height = self.ale.getScreenDims()
        self.gamescreen = np.empty((height, width, 3), dtype=np.uint8)

        self.frame_skip = frame_skip

        # setup action converter
        # ALE returns legal action indexes, convert these to just numbers
        self.action_inds = self.ale.getMinimalActionSet()

        # setup lives
        self.loss_of_life_reward = loss_of_life_reward
        self.cur_lives = self.ale.lives()
        self.loss_of_life_termination = loss_of_life_termination
        self.life_lost = False

        # reward clipping
        self.reward_clipping = reward_clipping

    def __str__(self):
        return 'ALE({})'.format(self.rom)

    def close(self):
        self.ale = None

    def reset(self):
        self.ale.reset_game()
        self.cur_lives = self.ale.lives()
        self.life_lost = False
        # clear gamescreen
        self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.uint8)
        return self.current_state

    def execute(self, action):
        # convert action to ale action
        ale_action = self.action_inds[action]

        # get reward and process terminal & next state
        rew = self.ale.act(ale_action)
        if self.loss_of_life_termination or self.loss_of_life_reward != 0:
            new_lives = self.ale.lives()
            if new_lives < self.cur_lives:
                self.cur_lives = new_lives
                self.life_lost = True
                rew += self.loss_of_life_reward

        if self.reward_clipping is not None:
            rew = np.clip(rew, self.reward_clipping[0],
                          self.reward_clipping[1])
        terminal = self.is_terminal
        state_tp1 = self.current_state
        return state_tp1, rew, terminal

    @property
    def states(self):
        return dict(shape=self.gamescreen.shape, type=float)

    @property
    def actions(self):
        return dict(continuous=False,
                    num_actions=len(self.action_inds),
                    names=self.action_names)

    @property
    def current_state(self):
        self.gamescreen = self.ale.getScreenRGB(self.gamescreen)
        return np.copy(self.gamescreen)

    @property
    def is_terminal(self):
        if self.loss_of_life_termination and self.life_lost:
            return True
        else:
            return self.ale.game_over()

    @property
    def action_names(self):
        action_names = [
            'No-Op', 'Fire', 'Up', 'Right', 'Left', 'Down', 'Up Right',
            'Up Left', 'Down Right', 'Down Left', 'Up Fire', 'Right Fire',
            'Left Fire', 'Down Fire', 'Up Right Fire', 'Up Left Fire',
            'Down Right Fire', 'Down Left Fire'
        ]
        return np.asarray(action_names)[self.action_inds]
Beispiel #40
0
dataPath = "data/"

initialExplorationRate = 1.0
finalExplorationRate = 0.1
SEED = None
np.random.seed(SEED)
loadModel = False
saveData = False
saveModel = False
gamma = .99
learning_rate = 0.00025

display_screen = False
frameSkip = 4

ale = ALEInterface()
# ale.setInt('random_seed', 0)
ale.setInt("frame_skip", frameSkip)
USE_SDL = True
if USE_SDL:
    if sys.platform == 'darwin':
        import pygame
        pygame.init()
        ale.setBool('sound', False)  # Sound doesn't work on OSX
    elif sys.platform.startswith('linux'):
        ale.setBool('sound', False)
    ale.setBool('display_screen', display_screen)

ale.loadROM("rom/Breakout.A26")
legal_actions = ale.getMinimalActionSet()
Beispiel #41
0
class ALE_Environment(EnvironmentBase):
    """
    Environment Specifications:
    Number of Actions = 18
    Original Frame Dimensions = 210 x 160
    Frame Dimensions = 84 x 84
    Frame Data Type = np.uint8
    Reward = Game Score

    Summary Name: frames_per_episode
    """
    def __init__(self,
                 config,
                 games_directory=None,
                 rom_filename=None,
                 summary=None):
        super().__init__()
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        display_screen              bool            False           Display game screen
        agent_render                bool            False           Display current frame the way the agent sees it
        frame_skip                  int             4               See ALE Documentation
        repeat_action_probability   float           0.25            in [0,1], see ALE Documentation
        max_num_frames              int             18000           Max number of frames per episode
        color_averaging             bool            False           If true, it averages over the skipped frames. 
                                                                    Otherwise, it takes the maximum over the skipped
                                                                    frames.
        frame_stack                 int             4               Stack of frames for agent, see Mnih et. al. (2015)
        save_summary                bool            False           Save the summary of the environment
        """

        assert isinstance(config, Config)
        self.display_screen = check_attribute_else_default(
            config, 'display_screen', False)
        self.agent_render = check_attribute_else_default(
            config, 'agent_render', False)
        self.frame_skip = check_attribute_else_default(config, 'frame_skip', 4)
        self.repeat_action_probability = check_attribute_else_default(
            config, 'repeat_action_probability', 0.25)
        max_num_frames = check_attribute_else_default(config, 'max_num_frames',
                                                      18000)
        self.color_averaging = check_attribute_else_default(
            config, 'color_averaging', True)
        if self.color_averaging:
            self.aggregate_func = np.average
        else:
            self.aggregate_func = np.amax
        self.frame_stack = check_attribute_else_default(
            config, 'frame_stack', 4)
        self.save_summary = check_attribute_else_default(
            config, 'save_summary', False)
        if self.save_summary:
            assert isinstance(summary, dict)
            self.summary = summary
            check_dict_else_default(self.summary, "frames_per_episode", [])

        " Environment variables"
        self.env = ALEInterface()
        self.env.setInt(b'frame_skip', 1)
        self.env.setInt(b'random_seed', 0)
        self.env.setFloat(b'repeat_action_probability', 0)
        self.env.setInt(b"max_num_frames_per_episode", max_num_frames)
        self.env.setBool(b"color_averaging", False)
        self.env.setBool(b'display_screen', self.display_screen)
        self.rom_file = str.encode(games_directory + rom_filename)
        self.frame_count = 0
        " Loading ROM "
        self.env.loadROM(self.rom_file)
        """ Fixed Parameters:
        Frame Format: "NCHW" (batch_size, channels, height, width). Decided to adopt this format because
        it's the fastest to process in tensorflow with a gpu.
        Frame Height and Width: 84, the default value in the literature.
        """
        " Inner state of the environment "
        self.height = 84
        self.width = 84
        self.current_state = np.zeros(
            [self.frame_stack, self.height, self.width], dtype=np.uint8)
        self.original_height = 210
        self.original_width = 160
        self.history = np.zeros(
            [self.frame_skip, self.original_height, self.original_width],
            np.uint8)
        self.reset()

        self.observations_dimensions = self.current_state.shape
        self.frame_dims = self.current_state[0].shape
        self.actions = self.env.getLegalActionSet()
        self.previous_action = 0

    def reset(self):
        if self.save_summary and (self.frame_count != 0):
            self.summary['frames_per_episode'].append(self.frame_count)
        self.env.reset_game()
        self.frame_count = 0
        original_frame = np.squeeze(self.env.getScreenGrayscale())
        self.history[-1] = original_frame
        fixed_state = self.fix_state()
        self.current_state[-1] = fixed_state
        self.previous_action = 0
        # self.agent_state_display()    # For debugging purposes

    def add_frame(self, frame):
        self.current_state[:-1] = self.current_state[1:]
        self.current_state[-1] = frame

    def update(self, action):
        reward = 0
        for _ in range(self.frame_skip):
            if not self.env.game_over():
                p = np.random.rand()
                current_action = self.previous_action if p <= self.repeat_action_probability else action
                reward += self.env.act(current_action)
                self.history[:-1] = self.history[1:]
                self.history[-1] = np.squeeze(self.env.getScreenGrayscale())
                self.frame_count += 1
        new_frame = self.fix_state()
        self.add_frame(new_frame)
        terminal = self.env.game_over()
        self.previous_action = action
        # self.agent_state_display()    # For debugging purposes only
        return self.current_state, reward, terminal

    def fix_state(self):
        agg_state = self.aggregate_func(self.history, axis=0)
        fixed_agg_state = resize(agg_state, (self.height, self.width),
                                 mode='constant',
                                 preserve_range=True)
        fixed_agg_state = np.array(fixed_agg_state, dtype=np.uint8)
        return fixed_agg_state

    def agent_state_display(self):
        if self.agent_render:
            state = self.current_state[-1]
            plt.imshow(state)
            plt.pause(0.05)

    " Getters "

    def get_current_state(self):
        return self.current_state

    def get_state_for_er_buffer(self):
        return self.current_state[-1]

    def get_num_actions(self):
        return 18

    " Setters "

    def set_render(self, display_screen=False):
        self.env.setBool(b'display_screen', display_screen)
        self.env.loadROM(self.rom_file)
Beispiel #42
0
    def __init__(self,
                 config,
                 games_directory=None,
                 rom_filename=None,
                 summary=None):
        super().__init__()
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        display_screen              bool            False           Display game screen
        agent_render                bool            False           Display current frame the way the agent sees it
        frame_skip                  int             4               See ALE Documentation
        repeat_action_probability   float           0.25            in [0,1], see ALE Documentation
        max_num_frames              int             18000           Max number of frames per episode
        color_averaging             bool            False           If true, it averages over the skipped frames. 
                                                                    Otherwise, it takes the maximum over the skipped
                                                                    frames.
        frame_stack                 int             4               Stack of frames for agent, see Mnih et. al. (2015)
        save_summary                bool            False           Save the summary of the environment
        """

        assert isinstance(config, Config)
        self.display_screen = check_attribute_else_default(
            config, 'display_screen', False)
        self.agent_render = check_attribute_else_default(
            config, 'agent_render', False)
        self.frame_skip = check_attribute_else_default(config, 'frame_skip', 4)
        self.repeat_action_probability = check_attribute_else_default(
            config, 'repeat_action_probability', 0.25)
        max_num_frames = check_attribute_else_default(config, 'max_num_frames',
                                                      18000)
        self.color_averaging = check_attribute_else_default(
            config, 'color_averaging', True)
        if self.color_averaging:
            self.aggregate_func = np.average
        else:
            self.aggregate_func = np.amax
        self.frame_stack = check_attribute_else_default(
            config, 'frame_stack', 4)
        self.save_summary = check_attribute_else_default(
            config, 'save_summary', False)
        if self.save_summary:
            assert isinstance(summary, dict)
            self.summary = summary
            check_dict_else_default(self.summary, "frames_per_episode", [])

        " Environment variables"
        self.env = ALEInterface()
        self.env.setInt(b'frame_skip', 1)
        self.env.setInt(b'random_seed', 0)
        self.env.setFloat(b'repeat_action_probability', 0)
        self.env.setInt(b"max_num_frames_per_episode", max_num_frames)
        self.env.setBool(b"color_averaging", False)
        self.env.setBool(b'display_screen', self.display_screen)
        self.rom_file = str.encode(games_directory + rom_filename)
        self.frame_count = 0
        " Loading ROM "
        self.env.loadROM(self.rom_file)
        """ Fixed Parameters:
        Frame Format: "NCHW" (batch_size, channels, height, width). Decided to adopt this format because
        it's the fastest to process in tensorflow with a gpu.
        Frame Height and Width: 84, the default value in the literature.
        """
        " Inner state of the environment "
        self.height = 84
        self.width = 84
        self.current_state = np.zeros(
            [self.frame_stack, self.height, self.width], dtype=np.uint8)
        self.original_height = 210
        self.original_width = 160
        self.history = np.zeros(
            [self.frame_skip, self.original_height, self.original_width],
            np.uint8)
        self.reset()

        self.observations_dimensions = self.current_state.shape
        self.frame_dims = self.current_state[0].shape
        self.actions = self.env.getLegalActionSet()
        self.previous_action = 0
Beispiel #43
0
import sys
from random import randrange
from ale_python_interface import ALEInterface
import numpy as np
import sys
import os
sys.path.append(os.path.abspath('../lib'))
from ImgProc.PongProcessing import PongProcessing as proc
from Autoencoder.Encoder import Encoder
import matplotlib.pyplot as plt

processor = proc()
encoder = Encoder(path_to_model='encoder_v2_model.json',
                  path_to_weights='encoder_v2_weights.h5')

ale = ALEInterface()

# Get & Set the desired settings
ale.setInt('random_seed', 123)

# Set USE_SDL to true to display the screen. ALE must be compilied
# with SDL enabled for this to work. On OSX, pygame init is used to
# proxy-call SDL_main.
USE_SDL = True
if USE_SDL:
    if sys.platform == 'darwin':
        import pygame
        pygame.init()
        ale.setBool('sound', False)  # Sound doesn't work on OSX
    elif sys.platform.startswith('linux'):
        ale.setBool('sound', False)
Beispiel #44
0
class Environment:

    def __init__(self, render=False):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', 0)
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        self.ale.setBool(b'color_averaging', True)
        self.ale.setInt(b'frame_skip', 4)
        self.ale.setBool(b'display_screen', render)
        self.ale.loadROM(ENV.encode('ascii'))
        self._screen = np.empty((250, 160, 1), dtype=np.uint8)
        self._no_op_max = 7

        self.img_buffer = []

    def set_render(self, render):
        if not render:
            self.ale.setBool(b'display_screen', render)

    def reset(self):
        self.ale.reset_game()

        # randomize initial state
        if self._no_op_max > 0:
            no_op = np.random.randint(0, self._no_op_max + 1)
            for _ in range(no_op):
                self.ale.act(0)

        self.img_buffer = []
        self.img_buffer.append(self.ale.getScreenRGB())

        self.ale.getScreenGrayscale(self._screen)
        screen = np.reshape(self._screen, (250, 160))
        screen = cv2.resize(screen, (84, 90))
        screen = screen[5:89, :]
        screen = screen.astype(np.float32)
        screen /= 255.0

        self.frame_buffer = np.stack((screen, screen, screen, screen), axis=2)
        return self.frame_buffer

    def process(self, action, gif=False):

        reward = self.ale.act(1+action)
        done = self.ale.game_over()

        if gif:
            self.img_buffer.append(self.ale.getScreenRGB())

        self.ale.getScreenGrayscale(self._screen)
        screen = np.reshape(self._screen, (250, 160))
        screen = cv2.resize(screen, (84, 90))
        screen = np.reshape(screen[5:89, :], (84, 84, 1))
        screen = screen.astype(np.float32)
        screen *= (1/255.0)

        self.frame_buffer = np.append(self.frame_buffer[:, :, 1:],
                                      screen, axis=2)

        return self.frame_buffer, reward, done, ""

    def save_gif(self, path):
        os.makedirs(os.path.dirname(path), exist_ok=True)
        imageio.mimsave(path, self.img_buffer, duration=0.001)
        self.img_buffer = []

    def close(self):
        self.ale.setBool(b'display_screen', False)
Beispiel #45
0
def ale_load_from_rom(rom_path, display_screen):
    rng = get_numpy_rng()
    try:
        from ale_python_interface import ALEInterface
    except ImportError as e:
        raise ImportError('Unable to import the python package of Arcade Learning Environment. ' \
                           'ALE may not have been installed correctly. Refer to ' \
                           '`https://github.com/mgbellemare/Arcade-Learning-Environment` for some' \
                           'installation guidance')

    ale = ALEInterface()
    ale.setInt(b'random_seed', rng.randint(1000))
    if display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool(b'sound', False)  # Sound doesn't work on OSX
        ale.setBool(b'display_screen', True)
    else:
        ale.setBool(b'display_screen', False)
    ale.setFloat(b'repeat_action_probability', 0)
    ale.loadROM(str.encode(rom_path))
    return ale
Beispiel #46
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    Will automatically restart when a real episode ends (isOver might be just
    lost of lives but not game over).
    """
    def __init__(self,
                 rom_file,
                 viz=0,
                 height_range=(None, None),
                 frame_skip=4,
                 image_shape=(84, 84),
                 nullop_start=30,
                 live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
            "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
        except AttributeError:
            if execute_only_once():
                logger.warn("You're not using latest ALE")

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w) uint8 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1], :].astype(
            'float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        return ret.astype('uint8')  # to save some memory

    def get_action_space(self):
        return DiscreteActionSpace(len(self.actions))

    def finish_episode(self):
        self.stats['score'].append(self.current_episode_score.sum)

    def restart_episode(self):
        self.current_episode_score.reset()
        with _ALE_LOCK:
            self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        if isOver:
            self.finish_episode()
        if self.ale.game_over():
            self.restart_episode()
        return (r, isOver)
Beispiel #47
0
    def __init__(self,
                 rom_file,
                 viz=0,
                 frame_skip=4,
                 nullop_start=30,
                 live_lost_as_eoe=True,
                 max_num_frames=0):
        """
        Args:
            rom_file: path to the rom
            frame_skip: skip every k frames and repeat the action
            viz: visualization to be done.
                Set to 0 to disable.
                Set to a positive number to be the delay between frames to show.
                Set to a string to be a directory to store frames.
            nullop_start: start with random number of null ops.
            live_losts_as_eoe: consider lost of lives as end of episode. Useful for training.
            max_num_frames: maximum number of frames per episode.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
            "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
        except AttributeError:
            if execute_only_once():
                logger.warn("You're not using latest ALE")

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setInt(b"max_num_frames_per_episode", max_num_frames)
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start

        self.action_space = spaces.Discrete(len(self.actions))
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.height, self.width),
                                            dtype=np.uint8)
        self._restart_episode()
Beispiel #48
0
        session.run(targetNet.b2.assign(trainNet.b2))
        session.run(targetNet.b3.assign(trainNet.b3))
        session.run(targetNet.b4.assign(trainNet.b4))


# def printDict(dict):
#     print 'Options:\n'
#     for i in dict.keys():
#         print " ",i,"=",dict[i]
#
#     print ''

# initialization
np.random.seed(SEED)

ale = ALEInterface()
if SEED == None:
    ale.setInt('random_seed', 0)
else:
    ale.setInt('random_seed', SEED)
ale.setInt("frame_skip", frameSkip)
ale.setBool('color_averaging', True)
ale.setBool('sound', False)
ale.setBool('display_screen', False)
ale.setFloat("repeat_action_probability", 0.0)
ale.loadROM(romPath)
legal_actions = ale.getMinimalActionSet()
n_actions = len(legal_actions)
opt.n_actions = n_actions

explorationRateDelta = (initialExplorationRate - finalExplorationRate) / (
Beispiel #49
0
class AtariPlayer(gym.Env):
    """
    A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings.

    Info:
        score: the accumulated reward in the current game
        gameOver: True when the current game is Over
    """
    def __init__(self,
                 rom_file,
                 viz=0,
                 frame_skip=4,
                 nullop_start=30,
                 live_lost_as_eoe=True,
                 max_num_frames=0):
        """
        Args:
            rom_file: path to the rom
            frame_skip: skip every k frames and repeat the action
            viz: visualization to be done.
                Set to 0 to disable.
                Set to a positive number to be the delay between frames to show.
                Set to a string to be a directory to store frames.
            nullop_start: start with random number of null ops.
            live_losts_as_eoe: consider lost of lives as end of episode. Useful for training.
            max_num_frames: maximum number of frames per episode.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
            "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
        except AttributeError:
            if execute_only_once():
                logger.warn("You're not using latest ALE")

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setInt(b"max_num_frames_per_episode", max_num_frames)
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start

        self.action_space = spaces.Discrete(len(self.actions))
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.height, self.width),
                                            dtype=np.uint8)
        self._restart_episode()

    def get_action_meanings(self):
        return [ACTION_MEANING[i] for i in self.actions]

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def _current_state(self):
        """
        :returns: a gray-scale (h, w) uint8 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                cv2.waitKey(int(self.viz * 1000))
        ret = ret.astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        return ret.astype('uint8')  # to save some memory

    def _restart_episode(self):
        with _ALE_LOCK:
            self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def reset(self):
        if self.ale.game_over():
            self._restart_episode()
        return self._current_state()

    def step(self, act):
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        isOver = self.ale.game_over()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives

        info = {'ale.lives': newlives}
        return self._current_state(), r, isOver, info
Beispiel #50
0
initialExplorationRate = 1.0
finalExplorationRate = 0.1
SEED = 123
np.random.seed(SEED)
loadModel = -1
loadModelPath = "model/" + "%02d" % loadModel + ".tfmodel"
saveData = False
saveModel = True
gamma = .99
learning_rate = 0.00025

display_screen = False
frameSkip = 4

ale = ALEInterface()
ale.setInt('random_seed', SEED)
ale.setInt("frame_skip", frameSkip)
USE_SDL = True
if USE_SDL:
    if sys.platform == 'darwin':
        import pygame
        pygame.init()
        ale.setBool('sound', False)  # Sound doesn't work on OSX
    elif sys.platform.startswith('linux'):
        ale.setBool('sound', False)
    ale.setBool('display_screen', display_screen)

ale.loadROM("rom/Breakout.A26")
legal_actions = ale.getMinimalActionSet()
Beispiel #51
0
    def __init__(self, rom_file, viz=0, height_range=(None,None),
            frame_skip=4, image_shape=(84, 84), nullop_start=30,
            live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
                "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()


        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()