Esempio n. 1
0
class ALE(object):
    def __init__(self, init_seed, init_rand):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', init_seed)
        self.ale.setFloat(b'repeat_action_probability', 0.0) 
        self.ale.loadROM('./breakout.bin')
        self.action_size = 4

        self.screen = None
        self.reward = 0
        self.terminal = True
        self.init_rand = init_rand

    def setSetting(self, action_repeat, screen_type):
        self.action_repeat = action_repeat
        self.screen_type = screen_type

    def _step(self, action):
        self.reward = self.ale.act(action)
        self.terminal = self.ale.game_over()

        if self.screen_type == 0:
            self.screen = self.ale.getScreenRGB()
        elif self.screen_type == 1:
            self.screen = self.ale.getScreenGrayscale()
        else:
            sys.stderr.write('screen_type error!')
            exit()


    def state(self):
        return self.reward, self.screen, self.terminal

    def act(self, action):
        cumulated_reward = 0
        for _ in range(self.action_repeat):
            self._step(action)
            cumulated_reward += self.reward
            if self.terminal:
                break
        self.reward = cumulated_reward
        return self.state()

    def new_game(self):
        if self.ale.game_over():
            self.ale.reset_game()

            if self.screen_type == 0:
                self.screen = self.ale.getScreenRGB()
            elif self.screen_type == 1:
                self.screen = self.ale.getScreenGrayscale()
            else:
                sys.stderr.write('screen_type error!')
                exit()

        for _ in range(self.init_rand):
            self._step(0)

        return self.screen
class emulator:
	def __init__(self, rom_name, vis):
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_mum_frames_per_episode")
		self.ale.setInt("random_seed", 123)
		self.ale.setInt("frame_skip", 4)
		self.ale.loadROM('roms/' + rom_name)
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i
		
		print self.legal_actions
		self.screen_width, self.screen_height = self.ale.getScreenDims()
		print("width/height: "+ str(self.screen_width) + "/" + str(self.screen_height))
		self.vis = vis
		if vis:
			cv2.startWindowThread()
			cv2.namedWindow("preview")
			
	def get_image(self):
		# numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		# self.ale.getScreenRGB(numpy_surface)
		# image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		image = self.ale.getScreenRGB()
		image = np.reshape(image, (self.screen_height, self.screen_width, 3))
		return image
	
	def newGame(self):
		self.ale.reset_game()
		return self.get_image(), 0, False
	
	def next(self, action_indx):
		reward = self.ale.act(action_indx)
		nextstate = self.get_image()
		if self.vis:
			cv2.imshow('preview', nextstate)
		return nextstate, reward, self.ale.game_over()
	
	def train(self):
		for episode in range(10):
			total_reward = 0
			frame_number = 0
			while not self.ale.game_over():
				a = self.legal_actions[random.randrange(len(self.legal_actions))]
				# Apply an action and get the resulting reward
				reward = self.ale.act(a);
				total_reward += reward
				screen = self.ale.getScreenRGB()
				screen = np.array(screen).reshape([self.screen_height, self.screen_width, -1])
				frame_number = self.ale.getEpisodeFrameNumber()
				cv2.imshow("screen", screen/255.0)
				cv2.waitKey(0)
				
			self.ale.saveScreenPNG("test_"+str(frame_number)+".png")
			print('Episode %d ended with score: %d' % (episode, total_reward))
			print('Frame number is : ', frame_number)
			self.ale.reset_game()
Esempio n. 3
0
class AtariMDP(MDP, Serializable):
    def __init__(self, rom_path, obs_type=OBS_RAM, frame_skip=4):
        Serializable.__init__(self, rom_path, obs_type, frame_skip)
        self.options = (rom_path, obs_type, frame_skip)

        self.ale = ALEInterface()
        self.ale.loadROM(rom_path)
        self._rom_path = rom_path
        self._obs_type = obs_type
        self._action_set = self.ale.getMinimalActionSet()
        self.frame_skip = frame_skip

    def get_image(self):
        return to_rgb(self.ale)

    def get_ram(self):
        return to_ram(self.ale)

    def game_over(self):
        return self.ale.game_over()

    def reset_game(self):
        return self.ale.reset_game()

    @property
    def n_actions(self):
        return len(self.action_set)

    def get_obs(self):
        if self._obs_type == OBS_RAM:
            return self.get_ram()[None, :]
        else:
            assert self._obs_type == OBS_IMAGE
            return self.get_image()[None, :, :, :]

    def step(self, a):

        reward = 0.0
        action = self.action_set[a]
        for _ in xrange(self.frame_skip):
            reward += self.ale.act(action)
        ob = self.get_obs().reshape(1, -1)
        return ob, np.array([reward]), self.ale.game_over()

    # return: (states, observations)
    def reset(self):
        self.ale.reset_game()
        return self.get_obs()

    @property
    def action_set(self):
        return self._action_set

    def plot(self):
        import cv2
        cv2.imshow("atarigame", self.get_image())  #pylint: disable=E1101
        cv2.waitKey(10)  #pylint: disable=E1101
Esempio n. 4
0
class AtariMDP(MDP, Serializable):

    def __init__(self, rom_path, obs_type=OBS_RAM, frame_skip=4):
        Serializable.__init__(self, rom_path, obs_type, frame_skip)
        self.options = (rom_path, obs_type, frame_skip)
        
        self.ale = ALEInterface()
        self.ale.loadROM(rom_path)        
        self._rom_path = rom_path
        self._obs_type = obs_type
        self._action_set = self.ale.getMinimalActionSet()
        self.frame_skip = frame_skip


    def get_image(self):
        return to_rgb(self.ale)
    def get_ram(self):
        return to_ram(self.ale)
    def game_over(self):
        return self.ale.game_over()
    def reset_game(self):
        return self.ale.reset_game()

    @property
    def n_actions(self):
        return len(self.action_set)

    def get_obs(self):
        if self._obs_type == OBS_RAM:
            return self.get_ram()[None,:]
        else:
            assert self._obs_type == OBS_IMAGE
            return self.get_image()[None,:,:,:]

    def step(self, a):

        reward = 0.0
        action = self.action_set[a]
        for _ in xrange(self.frame_skip):
            reward += self.ale.act(action)
        ob = self.get_obs().reshape(1,-1)
        return ob, np.array([reward]), self.ale.game_over()

    # return: (states, observations)
    def reset(self):
        self.ale.reset_game()
        return self.get_obs()

    @property
    def action_set(self):
        return self._action_set

    def plot(self):
        import cv2
        cv2.imshow("atarigame",self.get_image()) #pylint: disable=E1101
        cv2.waitKey(10) #pylint: disable=E1101
Esempio n. 5
0
class Atari:
    # Constructor
    def __init__(self, rom_name):
        # 1º Passo: carregamos o jogo e definimos seus parâmetros
        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt(
            b"max_num_frames_per_episode")
        self.ale.setInt(b"random_seed", 123)
        self.ale.setInt(b"frame_skip", 4)
        self.ale.loadROM(('game/' + rom_name).encode())

        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()

        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i

        # 2º Passo: criamos a janela para exibição
        self.windowname = rom_name
        cv2.startWindowThread()
        cv2.namedWindow(rom_name)

    # Essa função será utilizada para receber uma imagem do emulador, já em um formato esperado
    # por nosso algoritmo de treinamento.
    def get_image(self):
        numpy_surface = np.zeros(self.screen_height * self.screen_width * 3,
                                 dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface,
                           (self.screen_height, self.screen_width, 3))
        return image

    # Simplesmente inicializa o jogo
    def newGame(self):
        self.ale.reset_game()
        return self.get_image()

    # Essa função será responsável por retornar as informações da observação do estado após certa ação ser tomada.
    def next(self, action):
        reward = self.ale.act(self.legal_actions[np.argmax(action)])
        nextstate = self.get_image()

        cv2.imshow(self.windowname, nextstate)
        if self.ale.game_over():
            self.newGame()

        return nextstate, reward, self.ale.game_over()
class env_atari:
    def __init__(self, params):
        self.params = params
        self.ale = ALEInterface()
        self.ale.setInt('random_seed', np.random.randint(0, 500))
        self.ale.setFloat('repeat_action_probability', params['repeat_prob'])
        self.ale.setInt(b'frame_skip', params['frameskip'])
        self.ale.setBool('color_averaging', True)
        self.ale.loadROM('roms/' + params['rom'] + '.bin')
        self.actions = self.ale.getMinimalActionSet()
        self.action_space = c_action_space(len(self.actions))
        self.screen_width, self.screen_height = self.ale.getScreenDims()

    def reset(self):
        self.ale.reset_game()
        seed = np.random.randint(0, 7)
        for i in range(seed):
            self.ale.act(0)
        return self.get_image()

    def step(self, action):
        reward = self.ale.act(self.actions[action])
        next_s = self.get_image()
        terminate = self.ale.game_over()
        return next_s, reward, float(terminate), 0

    def get_image(self):
        temp = np.zeros(self.screen_height * self.screen_width * 3,
                        dtype=np.uint8)
        self.ale.getScreenRGB(temp)
        #self.ale.getScreenGrayscale(temp)
        return temp.reshape((self.screen_height, self.screen_width, 3))
class Emulate:
  def __init__(self, rom_file, display_screen=False,frame_skip=4,screen_height=84,screen_width=84,repeat_action_probability=0,color_averaging=True,random_seed=0,record_screen_path='screen_pics',record_sound_filename=None,minimal_action_set=True):
    self.ale = ALEInterface()
    if display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', frame_skip)
    self.ale.setFloat('repeat_action_probability', repeat_action_probability)
    self.ale.setBool('color_averaging', color_averaging)

    if random_seed:
      self.ale.setInt('random_seed', random_seed)

    self.ale.loadROM(rom_file)

    if minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
    else:
      self.actions = self.ale.getLegalActionSet()

    self.dims = (screen_width,screen_height)

  def numActions(self):
    return len(self.actions)

  def getActions(self):
  	return self.actions

  def restart(self):
    self.ale.reset_game()

  def act(self, action):
    reward = self.ale.act(self.actions[action])
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    return resized

  def getScreenGray(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    rotated = np.rot90(resized,k=1)
    return rotated

  def getScreenColor(self):
    screen = self.ale.getScreenRGB()
    resized = cv2.resize(screen, self.dims)
    rotated = np.rot90(resized,k=1)
    return rotated

  def isTerminal(self):
    return self.ale.game_over()
Esempio n. 8
0
class Emulator(object):
    FRAME_SKIP = 4
    SCREEN_WIDTH = 84
    SCREEN_HEIGHT = 84

    def __init__(self, rom):
        self.ale = ALEInterface()
        self.max_num_frames_per_episode = 100000 #self.ale.getInt('max_num_frames_per_episode')
        self.ale.setInt('frame_skip', self.FRAME_SKIP)
        self.ale.loadROM('roms/' + rom)
        self.actions = self.ale.getMinimalActionSet()
        
    def reset(self):
        self.ale.reset_game()

    def image(self):
        screen = self.ale.getScreenGrayscale()
        screen = cv2.resize(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH))
        return np.reshape(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH))

    def act(self, action):
        return self.ale.act(action)

    def terminal(self):
        return self.ale.game_over()
class pyrlcade_environment(object):
    def init(self,rom_file,ale_frame_skip):

        self.ale = ALEInterface()

        self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode");
        self.ale.set("random_seed",123)
        self.ale.set("disable_color_averaging",1)
        self.ale.set("frame_skip",ale_frame_skip)

        self.ale.loadROM(rom_file)
        self.legal_actions = self.ale.getMinimalActionSet()
        ram_size = self.ale.getRAMSize()
        self.ram = np.zeros((ram_size),dtype=np.uint8)
        self.ale.getRAM(self.ram)

        self.state = self.ale.getRAM(self.ram)

    def reset_state(self):
        self.ale.reset_game()

    def set_action(self,a):
        self.action = a

    def step(self):
        self.reward = self.ale.act(self.action)
        is_terminal = self.ale.game_over()
        return is_terminal

    def get_state(self):
        self.ale.getRAM(self.ram)
        return self.ram

    def get_reward(self):
        return self.reward
Esempio n. 10
0
class Environment:
  def __init__(self, rom_file, args):
    self.ale = ALEInterface()
    if args.display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', args.frame_skip)
    self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
    self.ale.setBool('color_averaging', args.color_averaging)

    if args.random_seed:
      self.ale.setInt('random_seed', args.random_seed)

    if args.record_screen_path:
      if not os.path.exists(args.record_screen_path):
        logger.info("Creating folder %s" % args.record_screen_path)
        os.makedirs(args.record_screen_path)
      logger.info("Recording screens to %s", args.record_screen_path)
      self.ale.setString('record_screen_dir', args.record_screen_path)

    if args.record_sound_filename:
      logger.info("Recording sound to %s", args.record_sound_filename)
      self.ale.setBool('sound', True)
      self.ale.setString('record_sound_filename', args.record_sound_filename)

    self.ale.loadROM(rom_file)

    if args.minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
      logger.info("Using minimal action set with size %d" % len(self.actions))
    else:
      self.actions = self.ale.getLegalActionSet()
      logger.info("Using full action set with size %d" % len(self.actions))
    logger.debug("Actions: " + str(self.actions))

    self.dims = (args.screen_height, args.screen_width)

  def numActions(self):
    return len(self.actions)

  def restart(self):
    self.ale.reset_game()

  def act(self, action):
    reward = self.ale.act(self.actions[action])
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    return resized

  def isTerminal(self):
    return self.ale.game_over()
Esempio n. 11
0
class Emulator(object):
    def __init__(self, settings):
        self.ale = ALEInterface()
        self.ale.setInt('frame_skip', settings['frame_skip'])
        self.ale.setInt('random_seed', np.random.RandomState().randint(1000))
        self.ale.setBool('color_averaging', False)
        self.ale.loadROM('roms/' + settings['rom_name'])
        self.actions = self.ale.getMinimalActionSet()
        self.width = settings['screen_width']
        self.height = settings['screen_height']
        
    def reset(self):
        self.ale.reset_game()

    def image(self):
        screen = self.ale.getScreenGrayscale()
        screen = cv2.resize(screen, (self.height, self.width),
                interpolation=cv2.INTER_LINEAR)
        return np.reshape(screen, (self.height, self.width))

    def full_image(self):
        screen = self.ale.getScreenRGB()
        return screen

    def act(self, action):
        return self.ale.act(self.actions[action])

    def terminal(self):
        return self.ale.game_over()
Esempio n. 12
0
def main():
    arguments = docopt.docopt(__doc__, version='ALE Demo Version 1.0')

    pygame.init()

    ale = ALEInterface()
    ale.setInt(b'random_seed', 123)
    ale.setBool(b'display_screen', True)
    ale.loadROM(str.encode(arguments['<rom_file>']))

    legal_actions = ale.getLegalActionSet()
    width, height = ale.getScreenDims()
    print(width, height)
    frame = ale.getScreenRGB()
    frame = np.array(frame, dtype=float)

    rewards, num_episodes = [], int(arguments['--iters'] or 5)
    for episode in range(num_episodes):
        total_reward = 0
        while not ale.game_over():
            total_reward += ale.act(random.choice(legal_actions))
        print('Episode %d reward %d.' % (episode, total_reward))
        rewards.append(total_reward)
        ale.reset_game()

    average = sum(rewards) / len(rewards)
    print('Average for %d episodes: %d' % (num_episodes, average))
Esempio n. 13
0
class ALEGame(object):
    """
    Class linked to the Arcade Learning Environment
    """
    def __init__(self, rand_seed, game_name):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', rand_seed)
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        self.ale.setBool(b'color_averaging', True)
        self.ale.setInt(b'frame_skip', SKIPED_FRAMES)
        self.ale.loadROM(game_name.encode('ascii'))

        self.real_actions = self.ale.getMinimalActionSet()
        self.screen = np.empty((IMAGE_HEIGHT, IMAGE_WIDTH, 1), dtype=np.uint8)
        self.reset()

    def preprocess_image(self, is_to_reshape=False):
        """
        Get image from the game and reshape it
        """
        self.ale.getScreenGrayscale(self.screen)
        reshaped_screen = np.reshape(self.screen, (IMAGE_HEIGHT, IMAGE_WIDTH))
        x_t = skimage.transform.resize(reshaped_screen, (110, 84),
                                       preserve_range=True)

        x_t = x_t[18:102, :]

        if is_to_reshape:
            x_t = np.reshape(x_t, (84, 84, 1))

        x_t = x_t.astype(np.float32)
        x_t *= (1.0 / 255.0)
        return x_t

    def reset(self):
        """
        Resets the game and create the first state
        """
        self.ale.reset_game()
        self.act(0)
        x_t = self.preprocess_image()
        self.s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)

    def act(self, action):
        self.reward = self.ale.act(action)
        self.is_game_over = self.ale.game_over()

    def process_to_next_image(self, action):
        """
        Acts and get new state        
        """
        real_action = self.real_actions[action]
        self.act(real_action)
        x_t1 = self.preprocess_image(True)
        self.s_t1 = np.append(self.s_t[:, :, 1:], x_t1, axis=2)

    def update(self):
        self.s_t = self.s_t1
Esempio n. 14
0
 def act_with_frame_skip(self, a): # trolololo
     reward = 0
     game_over = False
     lives = ALEInterface.lives(self)
     for _ in xrange(self.frame_skip):
         reward += ALEInterface.act(self, self.legal_actions[a])
         if ALEInterface.game_over(self) or (not self.test_mode and ALEInterface.lives(self) < lives):
             game_over = True
     return reward, game_over
Esempio n. 15
0
class AtariEnvironment(Environment):
    """
    Atari Environment Object
    """
    def __init__(self,
                 rom_path,
                 action_repeat=4,
                 death_end=True,
                 width_resize=84,
                 height_resize=84,
                 resize_mod='scale'):
        super(Environment, self).__init__()
        self.action_repeat = action_repeat
        self.death_end = death_end
        self.width_resize = width_resize
        self.height_resize = height_resize
        self.resize_mod = resize_mod
        self.display = False

        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()
        self.ale.loadROM(rom_path)
        self.ale.setInt('random_seed', np.random.randint(1000))
        self.ale.setBool('display_screen', self.display)
        self.action_set = self.ale.getMinimalActionSet()
        self.num_actions = len(self.action_set)
        self.start_lives = self.ale.lives()
        width, height = self.ale.getScreenDims()
        self.currentScreen = np.empty((height, width), dtype=np.uint8)
        self.reset()

    def reset(self):
        self.ale.reset_game()
        self.ale.getScreenGrayscale(self.currentScreen)
        self.terminal = False

    def step(self, action, repeat=None):
        repeat = self.action_repeat if repeat is None else repeat
        reward = 0
        for _ in range(repeat):
            reward += self.ale.act(self.action_set[action])
        self.ale.getScreenGrayscale(self.currentScreen)
        self.terminal = self.death_end and self.ale.lives(
        ) < self.start_lives or self.ale.game_over()
        return reward

    def get_frame(self):
        if self.resize_mod == 'scale':
            return imresize(self.currentScreen,
                            (self.width_resize, self.height_resize),
                            interp='bilinear')
        elif self.resize_mod == 'crop':
            height, width = self.currentScreen.shape
            res = (height - width) / 2
            crop = self.currentScreen[res:(res + width), :]
            return imresize(crop, (self.width_resize, self.height_resize),
                            interp='bilinear')
Esempio n. 16
0
class Emulator:
    def __init__(self):
    
        self.ale = ALEInterface()
        
        # turn off the sound
        self.ale.setBool('sound', False)
        
        self.ale.setBool('display_screen', EMULATOR_DISPLAY)

        self.ale.setInt('frame_skip', FRAME_SKIP)
        self.ale.setFloat('repeat_action_probability', REPEAT_ACTION_PROBABILITY)
        self.ale.setBool('color_averaging', COLOR_AVERAGING)

        self.ale.setInt('random_seed', RANDOM_SEED)

        if RECORD_SCENE_PATH:
            self.ale.setString('record_screen_dir', RECORD_SCENE_PATH)


        self.ale.loadROM(ROM_PATH)

        self.actions = self.ale.getMinimalActionSet()
        logger.info("Actions: " + str(self.actions))

        self.dims = DIMS
        #self.start_lives = self.ale.lives()

    def getActions(self):
        return self.actions

    def numActions(self):
        return len(self.actions)

    def restart(self):
        self.ale.reset_game()
        # can be omitted

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def getScreen(self):
        # why grayscale ?
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        # normalize
        #resized /= COLOR_SCALE

        return resized

    def isTerminal(self):
        # while training deepmind only ends when agent dies
        #terminate = DEATH_END and TRAIN and (self.ale.lives() < self.start_lives)

        return self.ale.game_over()
Esempio n. 17
0
class Env():
    def __init__(self, rom_name):
        self.__initALE()
        self.__loadROM(rom_name)
        self.screen_history = []
        self.screens = []

    def __initALE(self):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', randrange(1000))
        self.ale.setInt(b'fragsize', 64)
        self.ale.setInt(b'frame_skip', 1)

        # qq set this back to 0.25?
        self.ale.setFloat(b'repeat_action_probability', 0)
        self.ale.setLoggerMode('error')

    def __loadROM(self, rom_name):
        self.ale.loadROM(rom_name.encode('utf-8'))
        self.actions = self.ale.getMinimalActionSet()

        (width, height) = self.ale.getScreenDims()
        self.screen_data1 = np.empty((height, width, 3), dtype=np.uint8)
        self.screen_data2 = np.empty((height, width, 3), dtype=np.uint8)

    def get_legal_action_count(self):
        return len(self.actions)

    def act(self, action_index):
        action = self.actions[action_index]
        reward = 0

        # perform the action 4 times
        reward += _clip(self.ale.act(action), -1, 1)
        reward += _clip(self.ale.act(action), -1, 1)
        reward += _clip(self.ale.act(action), -1, 1)
        self.ale.getScreenRGB(self.screen_data1)
        reward += _clip(self.ale.act(action), -1, 1)
        self.ale.getScreenRGB(self.screen_data2)

        # return the pixel-wise max of the last two frames (some games only
        # render every other frame)
        screen_data_combined = np.maximum(self.screen_data1, self.screen_data2)
        terminal = self.ale.game_over()

        self.screens.append(preprocess_screen(screen_data_combined))
        phi = get_phi(self.screens)

        return (terminal, reward, phi, self.screen_data2)

    def get_s(self):
        return get_phi(self.screens)

    def reset(self):
        self.ale.reset_game()
        self.screens = []
Esempio n. 18
0
class Emulator:
    def __init__(self):

        self.ale = ALEInterface()

        # turn off the sound
        self.ale.setBool('sound', False)

        self.ale.setBool('display_screen', EMULATOR_DISPLAY)

        self.ale.setInt('frame_skip', FRAME_SKIP)
        self.ale.setFloat('repeat_action_probability',
                          REPEAT_ACTION_PROBABILITY)
        self.ale.setBool('color_averaging', COLOR_AVERAGING)

        self.ale.setInt('random_seed', RANDOM_SEED)

        if RECORD_SCENE_PATH:
            self.ale.setString('record_screen_dir', RECORD_SCENE_PATH)

        self.ale.loadROM(ROM_PATH)

        self.actions = self.ale.getMinimalActionSet()
        logger.info("Actions: " + str(self.actions))

        self.dims = DIMS
        #self.start_lives = self.ale.lives()

    def getActions(self):
        return self.actions

    def numActions(self):
        return len(self.actions)

    def restart(self):
        self.ale.reset_game()
        # can be omitted

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def getScreen(self):
        # why grayscale ?
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        # normalize
        #resized /= COLOR_SCALE

        return resized

    def isTerminal(self):
        # while training deepmind only ends when agent dies
        #terminate = DEATH_END and TRAIN and (self.ale.lives() < self.start_lives)

        return self.ale.game_over()
Esempio n. 19
0
class AtariEnvironment:
    def __init__(self, rom):
        self.ale = ALEInterface()
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        self.ale.loadROM(rom_file=rom)
        self.action_space = self.ale.getMinimalActionSet()
        self.obs = self.reset()

        try:
            self.im = Image.fromarray(self.obs)
            self.root = Tk()
            self.tkim = ImageTk.PhotoImage(self.im)
            self.window = Label(image=self.tkim)
            self.window.image = self.tkim
            self.window.pack()

        except AttributeError:
            print("Cannot create rendering attributes")

    def step(self, action):
        reward = 0.

        # Use if you want environment to provide every 4th frame and repeat action in between
        for i in range(4):
            reward += float(self.ale.act(self.action_space[action]))
            if i == 2:
                frame1 = self.ale.getScreenGrayscale()
            if i == 3:
                frame2 = self.ale.getScreenGrayscale()

        self.obs = np.squeeze(np.maximum(frame1, frame2))

        # Use if you want to receive every frame from environment
        # reward += float(self.ale.act(self.action_space[action]))
        # self.obs = np.squeeze(self.ale.getScreenGrayscale())

        done = self.ale.game_over()
        return self.obs, reward, done

    def reset(self):
        self.ale.reset_game()
        self.obs = np.squeeze(self.ale.getScreenGrayscale())
        return self.obs

    def render(self, rate=0.1):
        self.im = Image.fromarray(self.obs)
        self.tkim = ImageTk.PhotoImage(self.im)
        self.window.configure(image=self.tkim)
        self.window.image = self.tkim
        self.window.update_idletasks()
        self.window.update()
        time.sleep(rate)

    def sample_action(self):
        action = random.choice([0, 1, 2, 3])
        return action
Esempio n. 20
0
class Atari:
    def __init__(self, rom_name):

        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt(
            "max_num_frames_per_episode")
        self.ale.setInt("random_seed", 123)
        self.ale.setInt("frame_skip", 4)
        self.ale.loadROM('game/' + rom_name)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()
        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i
        # print len(self.legal_actions)
        self.windowname = rom_name
        cv2.startWindowThread()
        cv2.namedWindow(rom_name)

    def get_image(self):

        numpy_surface = np.zeros(self.screen_height * self.screen_width * 3,
                                 dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface,
                           (self.screen_height, self.screen_width, 3))
        return image

    def newGame(self):

        self.ale.reset_game()
        return self.get_image()

    def next(self, action):

        reward = self.ale.act(self.legal_actions[np.argmax(action)])
        nextstate = self.get_image()

        cv2.imshow(self.windowname, nextstate)
        if self.ale.game_over():
            self.newGame()
        # print "reward %d" % reward
        return nextstate, reward, self.ale.game_over()
Esempio n. 21
0
class Environment:
    def __init__(self, render=False):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', 0)
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        self.ale.setBool(b'color_averaging', True)
        self.ale.setInt(b'frame_skip', 4)
        self.ale.setBool(b'display_screen', render)
        self.ale.loadROM(ENV.encode('ascii'))
        self._screen = np.empty((210, 160, 1), dtype=np.uint8)
        self._no_op_max = 7

    def set_render(self, render):
        if not render:
            self.ale.setBool(b'display_screen', render)

    def reset(self):
        self.ale.reset_game()

        # randomize initial state
        if self._no_op_max > 0:
            no_op = np.random.randint(0, self._no_op_max + 1)
            for _ in range(no_op):
                self.ale.act(0)

        self.ale.getScreenGrayscale(self._screen)
        screen = np.reshape(self._screen, (210, 160))
        screen = cv2.resize(screen, (84, 110))
        screen = screen[18:102, :]
        screen = screen.astype(np.float32)
        screen /= 255.0

        self.frame_buffer = np.stack((screen, screen, screen, screen), axis=2)
        return self.frame_buffer

    def act(self, action):

        reward = self.ale.act(4 + action)
        done = self.ale.game_over()

        self.ale.getScreenGrayscale(self._screen)
        screen = np.reshape(self._screen, (210, 160))
        screen = cv2.resize(screen, (84, 110))
        screen = np.reshape(screen[18:102, :], (84, 84, 1))
        screen = screen.astype(np.float32)
        screen *= (1 / 255.0)

        self.frame_buffer = np.append(self.frame_buffer[:, :, 1:],
                                      screen,
                                      axis=2)

        return self.frame_buffer, reward, done, ""

    def close(self):
        self.ale.setBool(b'display_screen', False)
Esempio n. 22
0
class Atari:
    def __init__(self,rom_name):
        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode")
        self.ale.setInt("random_seed",123)
        self.ale.setInt("frame_skip",4)
        self.ale.loadROM(rom_name)
        self.screen_width,self.screen_height = self.ale.getScreenDims()
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()
        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i
        print len(self.legal_actions)
        self.windowname = rom_name
        cv2.startWindowThread()
        cv2.namedWindow(rom_name)

    def preprocess(self, image):
        image = cv2.cvtColor(cv2.resize(image, (84, 110)), cv2.COLOR_BGR2GRAY)
        image = image[26:110,:]
        ret, image = cv2.threshold(image,1,255,cv2.THRESH_BINARY)
        return np.reshape(image,(84,84, 1))

    def get_image(self):
        numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
        return self.preprocess(image)

    def newGame(self):
        self.ale.reset_game()
        return self.get_image()

    def next(self, action):
        reward = self.ale.act(self.legal_actions[np.argmax(action)])    
        nextstate = self.get_image()

        cv2.imshow(self.windowname,nextstate)
        if self.ale.game_over():
            self.newGame()
        #print "reward %d" % reward 
        return nextstate, reward, self.ale.game_over()
Esempio n. 23
0
class Environment:
    def __init__(self, show_screen, history_length):
        self.ale = ALEInterface()
        self.ale.setInt('frame_skip', 4)
        self.history = None
        self.history_length = history_length
        if show_screen:
            self.display_screen()
        self.load_game()
        (screen_width, screen_height) = self.ale.getScreenDims()
        self.screen_data = np.empty((screen_height, screen_width, 1),
                                    dtype=np.uint8)  # 210x160 screen data
        self.dims = (84, 84)  # input size for neural network
        self.actions = [3, 0, 1, 4]  # noop, left, right, fire,

    def display_screen(self):
        self.ale.setBool("display_screen", True)

    def turn_on_sound(self):
        self.ale.setBool("sound", True)

    def restart(self):
        """reset game"""
        self.ale.reset_game()

    def act(self, action):
        """:returns reward of an action"""
        return self.ale.act(self.actions[action])

    def __get_screen(self):
        """:returns Grayscale thresholded resized screen image """
        self.ale.getScreenGrayscale(self.screen_data)
        resized = cv2.resize(self.screen_data, self.dims)
        return resized

    def get_state(self):
        binary_screen = self.__get_screen()
        if self.history is None:
            self.history = deque(maxlen=self.history_length)
            for _ in range(self.history_length - 1):
                self.history.append(binary_screen)
        self.history.append(binary_screen)
        result = np.stack(self.history, axis=0)
        return result

    def isTerminal(self):
        """checks if game is over"""
        return self.ale.game_over()

    def load_game(self):
        """load game from file"""
        self.ale.loadROM("Breakout.bin")
class emulator:
    def __init__(self, rom_name, vis, windowname='preview'):
        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt(
            "max_num_frames_per_episode")
        self.ale.setInt("random_seed", 123)
        self.ale.setInt("frame_skip", 4)
        self.ale.loadROM('roms/' + rom_name)
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()
        self.windowname = windowname
        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i
        self.init_frame_number = 0

        # print(self.legal_actions)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        print("width/height: " + str(self.screen_width) + "/" +
              str(self.screen_height))
        self.vis = vis
        if vis:
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)

    def get_image(self):
        numpy_surface = np.zeros(self.screen_height * self.screen_width * 3,
                                 dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface,
                           (self.screen_height, self.screen_width, 3))
        return image

    def newGame(self):
        # Instead of resetting the game, we load a checkpoint and start from there.
        # self.ale.reset_game()
        self.ale.restoreState(
            self.ale.decodeState(checkpoints[random.randint(
                0, 99)].astype('uint8')))
        self.init_frame_number = self.ale.getFrameNumber()
        #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1))))
        return self.get_image()

    def next(self, action_indx):
        reward = self.ale.act(action_indx)
        nextstate = self.get_image()
        # scipy.misc.imsave('test.png',nextstate)
        if self.vis:
            cv2.imshow(self.windowname, nextstate)
        return nextstate, reward, self.ale.game_over()

    def get_frame_number(self):
        return self.ale.getFrameNumber() - self.init_frame_number
Esempio n. 25
0
class emulator(object):
    def __init__(self, rom_name, vis, frameskip=1, windowname='preview'):
        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt(
            "max_num_frames_per_episode")
        self.ale.setInt("random_seed", 123)
        self.ale.setInt("frame_skip", frameskip)
        romfile = str(ROM_PATH) + str(rom_name)
        if not os.path.exists(romfile):
            print('No ROM file found at "' + romfile +
                  '".\nAdjust ROM_PATH or double-check the filt exists.')
        self.ale.loadROM(romfile)
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()
        self.windowname = windowname
        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i

        # print(self.legal_actions)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        print("width/height: " + str(self.screen_width) + "/" +
              str(self.screen_height))
        self.vis = vis
        if vis:
            cv2.startWindowThread()
            cv2.namedWindow(
                self.windowname,
                flags=cv2.WINDOW_AUTOSIZE)  # permit manual resizing

    def get_image(self):
        numpy_surface = np.zeros(self.screen_height * self.screen_width * 3,
                                 dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface,
                           (self.screen_height, self.screen_width, 3))
        return image

    def newGame(self):
        self.ale.reset_game()
        return self.get_image()

    def next(self, action_indx):
        reward = self.ale.act(action_indx)
        nextstate = self.get_image()
        # scipy.misc.imsave('test.png',nextstate)
        if self.vis:
            cv2.imshow(self.windowname, nextstate)
            if sys.platform == 'darwin':
                # if we don't do this, can hang on OS X
                cv2.waitKey(2)
        return nextstate, reward, self.ale.game_over()
Esempio n. 26
0
class Environment:
    def __init__(self, show_screen, history_length):
        self.ale = ALEInterface()
        self.ale.setInt('frame_skip', 4)
        self.history = None
        self.history_length = history_length
        if show_screen:
            self.display_screen()
        self.load_game()
        (screen_width, screen_height) = self.ale.getScreenDims()
        self.screen_data = np.empty((screen_height, screen_width, 1), dtype=np.uint8)  # 210x160 screen data
        self.dims = (84, 84)  # input size for neural network
        self.actions = [3, 0, 1, 4]  # noop, left, right, fire,

    def display_screen(self):
        self.ale.setBool("display_screen", True)

    def turn_on_sound(self):
        self.ale.setBool("sound", True)

    def restart(self):
        """reset game"""
        self.ale.reset_game()

    def act(self, action):
        """:returns reward of an action"""
        return self.ale.act(self.actions[action])

    def __get_screen(self):
        """:returns Grayscale thresholded resized screen image """
        self.ale.getScreenGrayscale(self.screen_data)
        resized = cv2.resize(self.screen_data, self.dims)
        return resized

    def get_state(self):
        binary_screen = self.__get_screen()
        if self.history is None:
            self.history = deque(maxlen=self.history_length)
            for _ in range(self.history_length - 1):
                self.history.append(binary_screen)
        self.history.append(binary_screen)
        result = np.stack(self.history, axis=0)
        return result

    def isTerminal(self):
        """checks if game is over"""
        return self.ale.game_over()

    def load_game(self):
        """load game from file"""
        self.ale.loadROM("Breakout.bin")
Esempio n. 27
0
class ALEInterfaceWrapper:
    def __init__(self, repeat_action_probability, rng):
        self.internal_action_repeat_prob = repeat_action_probability
        self.prev_action = 0
        self.rng_source = rng
        self.rng = deepcopy(self.rng_source)
        self.ale = ALEInterface()
        '''
		This sets the probability from the default 0.25 to 0.
		It ensures deterministic actions.
		'''
        self.ale.setFloat('repeat_action_probability', 0.0)

    def getScreenRGB(self):
        return self.ale.getScreenRGB()

    def game_over(self):
        return self.ale.game_over()

    def reset_game(self):
        self.ale.reset_game()

    def lives(self):
        return self.ale.lives()

    def getMinimalActionSet(self):
        return self.ale.getMinimalActionSet()

    def setInt(self, key, value):
        self.ale.setInt(key, value)

    def setFloat(self, key, value):
        self.ale.setFloat(key, value)

    def loadROM(self, rom):
        self.ale.loadROM(rom)

    def reset_action_seed(self):
        self.rng = deepcopy(self.rng_source)

    def set_action_seed(self, seed):
        self.rng = np.random.RandomState(seed)

    def act(self, action):
        actual_action = action
        if self.internal_action_repeat_prob > 0:
            if self.rng.uniform(0, 1) < self.internal_action_repeat_prob:
                actual_action = self.prev_action
        self.prev_action = actual_action
        return self.ale.act(actual_action)
Esempio n. 28
0
class AleInterface(object):
    def __init__(self, game, args):
        self.game = game
        self.ale = ALEInterface()

        # if sys.platform == 'darwin':
        #     self.ale.setBool('sound', False)  # Sound doesn't work on OSX
        # elif sys.platform.startswith('linux'):
        #     self.ale.setBool('sound', True)
        # self.ale.setBool('display_screen', True)
        #
        self.ale.setBool('display_screen', args.display_screen)

        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)
        self.ale.setInt('random_seed', args.random_seed)

        #
        # if rand_seed is not None:
        #     self.ale.setInt('random_seed', rand_seed)

        rom_file = "./roms/%s.bin" % game
        if not os.path.exists(rom_file):
            print "not found rom file:", rom_file
            sys.exit(-1)
        self.ale.loadROM(rom_file)

        self.actions = self.ale.getMinimalActionSet()


    def get_actions_num(self):
        return len(self.actions)

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def get_screen_gray(self):
        return self.ale.getScreenGrayscale()

    def get_screen_rgb(self):
        return self.ale.getScreenRGB()

    def game_over(self):
        return self.ale.game_over()

    def reset_game(self):
        return self.ale.reset_game()
class Atari:
	def __init__(self,rom_name):
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode")
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('./' +rom_name)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i
		#print len(self.legal_actions)
		self.windowname = rom_name
		#cv2.startWindowThread()
		#cv2.namedWindow(rom_name)

	def get_image(self):
		numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		self.ale.getScreenRGB(numpy_surface)
		image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		return image

	def newGame(self):
		self.ale.reset_game()
		return self.get_image()

	def next(self, action):
		reward = self.ale.act(self.legal_actions[np.argmax(action)])	
		nextstate = self.get_image()
		
		#cv2.imshow(self.windowname,nextstate)
		if self.ale.game_over():
			self.newGame()
		#print "reward %d" % reward 
		return nextstate, reward, self.ale.game_over()
class emulator:
	def __init__(self, rom_name, vis,windowname='preview'):
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode");
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('roms/' + rom_name )
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		self.windowname = windowname
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i
		self.init_frame_number = 0

		# print(self.legal_actions)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height))
		self.vis = vis
		if vis: 
			cv2.startWindowThread()
			cv2.namedWindow(self.windowname)

	def get_image(self):
		numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		self.ale.getScreenRGB(numpy_surface)
		image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		return image

	def newGame(self):
		# Instead of resetting the game, we load a checkpoint and start from there.
		# self.ale.reset_game()
		self.ale.restoreState(self.ale.decodeState(checkpoints[random.randint(0,99)].astype('uint8')))
		self.init_frame_number = self.ale.getFrameNumber()
		#self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1))))
		return self.get_image()

	def next(self, action_indx):
		reward = self.ale.act(action_indx)	
		nextstate = self.get_image()
		# scipy.misc.imsave('test.png',nextstate)
		if self.vis:
			cv2.imshow(self.windowname,nextstate)
		return nextstate, reward, self.ale.game_over()

	def get_frame_number(self):
		return self.ale.getFrameNumber() - self.init_frame_number
Esempio n. 31
0
class Emulator:
    def __init__(self, rom_name, vis):
        self.ale = ALEInterface()
        self.max_frames_per_episode = self.ale.getInt(
            "max_num_frames_per_episode")
        self.ale.setInt("random_seed", 123)
        self.ale.setInt("frame_skip", 4)
        self.ale.loadROM('roms/' + rom_name)
        self.legal_actions = self.ale.getMinimalActionSet()
        self.action_map = dict()
        for i in range(len(self.legal_actions)):
            self.action_map[self.legal_actions[i]] = i

        #print(self.legal_actions)
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        print("width/height: " + str(self.screen_width) + "/" +
              str(self.screen_height))
        self.vis = vis
        if vis:
            cv2.startWindowThread()
            cv2.namedWindow("preview")

    def get_image(self):
        numpy_surface = np.zeros(self.screen_height * self.screen_width * 3,
                                 dtype=np.uint8)
        self.ale.getScreenRGB(numpy_surface)
        image = np.reshape(numpy_surface,
                           (self.screen_height, self.screen_width, 3))
        #added by ben may 2016
        print image
        print '&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& printing'
        return image

    def newGame(self):
        self.ale.reset_game()
        return self.get_image()

    def next(self, action_indx):
        reward = self.ale.act(action_indx)
        nextstate = self.get_image()
        # scipy.misc.imsave('test.png',nextstate)
        # scipy.misc.imsave('test.png',nextstate)
        if self.vis:
            cv2.imshow('preview', nextstate)
        return nextstate, reward, self.ale.game_over()
Esempio n. 32
0
def train():
    ale = ALEInterface()
    ale.setInt('random_seed', 123)
    ale.loadROM('roms/breakout.bin')
    legal_actions = ale.getLegalActionSet()
    total_reward = 0
    while not ale.game_over():
        a = legal_actions[randrange(len(legal_actions))]
        reward = ale.act(a)
        screen = None
        screen = ale.getScreenRGB()
        print(screen)
        plt.imshow(screen)
        plt.show()

        total_reward += reward
        print(total_reward)
    print('Episode end!')
def main():
    if len(sys.argv) < 2:
        dir_rom = '/Users/lguan/Documents/Study/Research/Atari-2600-Roms/T-Z/Tennis.bin'
    else:
        dir_rom = sys.argv[1]

    ale = ALEInterface()

    # Get & Set the desired settings
    ale.setInt(b'random_seed', 123)

    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.
    USE_SDL = True
    if USE_SDL:
        # mac OS
        if sys.platform == 'darwin':
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)

        ale.setBool('display_screen', True)

    # Load the ROM file
    rom_file = str.encode(dir_rom)
    print('- Loading ROM - %s' % dir_rom)
    ale.loadROM(rom_file)
    print('- Complete loading ROM')

    legal_actions = ale.getMinimalActionSet()

    # Play 10 episodes
    for episode in range(10):
        total_reward = 0
        while not ale.game_over():
            a = legal_actions[np.random.randint(legal_actions.size)]
            # Apply an action and get the resulting reward
            reward = ale.act(a)
            total_reward += reward
        print('Episode %d ended with score: %d' % (episode, total_reward))
        ale.reset_game()
Esempio n. 34
0
class emulator:
	def __init__(self, rom_name, vis, windowname='preview'):
  		self.ale = ALEInterface()
		# When it starts
  		self.ale.setInt("random_seed", 123)
		# Skipping 4 frames 
  		self.ale.setInt("frame_skip", 4)
  		self.ale.loadROM('roms/' + rom_name)
  		self.legal_actions = self.ale.getMinimalActionSet()
  		print('Actions : %s' % self.legal_actions)
  		self.action_map = dict()
  		self.windowname = windowname
  		# Raw atari frames, 210 * 160 pixel images 
  		self.screen_width, self.screen_height = self.ale.getScreenDims()
  		print("widht/height: " + str(self.screen_width) + "/" + str(self.screen_height))
  		# Visualize
  		self.vis = vis
  		if vis:
   			cv2.startWindowThread()
   			cv2.namedWindow(self.windowname)

 	def get_image(self):
  		# Need to specify data type as uint8
  		numpy_surface = np.zeros([self.screen_width * self.screen_height * 3], dtype=np.uint8)
  		# get RGB values
  		self.ale.getScreenRGB(numpy_surface)
  		image = np.reshape(numpy_surface, [self.screen_height, self.screen_width, 3])
  		return image

 	def new_game(self):
  		self.ale.reset_game()
  		# Reset game and getting reset image value
  		return self.get_image()

 	def next(self, action_index):
  		# Get R(s,a)
  		reward = self.ale.act(action_index)
  		# Get image pixel value after taking an action
  		next_state = self.get_image()
  		if self.vis:
   			cv2.imshow(self.windowname, next_state)
  		# self.ale.game_over() returns True when game is over
  		return next_state, reward, self.ale.game_over()
Esempio n. 35
0
class Breakout(object):
    steps_between_actions = 4

    def __init__(self):
        self.ale = ALEInterface()
        self.ale.setInt('random_seed', 123)
        self.ale.setBool("display_screen", False)
        self.ale.setBool("sound", False)
        self.ale.loadROM("%s/breakout.bin" % rom_directory)
        self.current_state = [
            self.ale.getScreenRGB(), self.ale.getScreenRGB()
        ]

    def start_episode(self):
        self.ale.reset_game()

    def take_action(self, action):
        assert not self.terminated

        def step():
            reward = self.ale.act(action)
            self.roll_state()
            return reward

        reward = sum(step() for _ in xrange(self.steps_between_actions))

        return (reward, self.current_state)

    def roll_state(self):
        assert len(self.current_state) == 2
        self.current_state = [self.current_state[1], self.ale.getScreenRGB()]
        assert len(self.current_state) == 2

    @property
    def actions(self):
        return self.ale.getMinimalActionSet()

    @property
    def terminated(self):
        return self.ale.game_over() or self.ale.lives() < 5
Esempio n. 36
0
class game(object):
    def __init__(self, display):
        self.ale = ALEInterface()

        # Get & Set the desired settings
        self.ale.setInt('random_seed', 123)

        # Set USE_SDL to true to display the screen. ALE must be compilied
        # with SDL enabled for this to work. On OSX, pygame init is used to
        # proxy-call SDL_main.
        USE_SDL = display
        if USE_SDL:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        # Load the ROM file
        self.ale.loadROM("ms_pacman.bin")

    def act(self, action):
        return self.ale.act(action)

    def getState(self):
        return get_feature(self.ale.getScreen())

    def getScreen(self):
        return self.ale.getScreen()

    def reset_game(self):
        self.ale.reset_game()

    def lives(self):
        return self.ale.lives()

    def game_over(self):
        return self.ale.game_over()
class ALEInterfaceWrapper:
    def __init__(self, repeat_action_probability):
        self.internal_action_repeat_prob = repeat_action_probability
        self.prev_action = 0
        self.ale = ALEInterface()
        '''
		This sets the probability from the default 0.25 to 0.
		It ensures deterministic actions.
		'''
        self.ale.setFloat('repeat_action_probability',
                          repeat_action_probability)

    def getScreenRGB(self):
        return self.ale.getScreenRGB()

    def game_over(self):
        return self.ale.game_over()

    def reset_game(self):
        self.ale.reset_game()

    def lives(self):
        return self.ale.lives()

    def getMinimalActionSet(self):
        return self.ale.getMinimalActionSet()

    def setInt(self, key, value):
        self.ale.setInt(key, value)

    def setFloat(self, key, value):
        self.ale.setFloat(key, value)

    def loadROM(self, rom):
        self.ale.loadROM(rom)

    def act(self, action):
        actual_action = action
        return self.ale.act(actual_action)
Esempio n. 38
0
        class Env:
            def __init__(self):
                self.ale = ALEInterface()
                rom_name = "roms/Breakout.bin"
                self.ale.setInt("frame_skip", 4)
                self.ale.loadROM(rom_name)
                legal_actions = self.ale.getMinimalActionSet()
                self.action_map = {}
                for i in range(len(legal_actions)):
                    self.action_map[i] = legal_actions[i]
                self.action_num = len(self.action_map)

            def reset(self):
                state = np.zeros((84, 84, 3), dtype=np.uint8)
                self.ale.reset_game()
                return state

            def step(self, action):
                reward = self.ale.act(self.action_map[action])
                state = self.ale.getScreenRGB()
                done = self.ale.game_over()
                return state, reward, done, ""
Esempio n. 39
0
class emulator:
	def __init__(self, rom_name, vis):
		if vis:
			import cv2
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode");
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('roms/' + rom_name )
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i

		# print(self.legal_actions)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height))
		self.vis = vis
		if vis: 
			cv2.startWindowThread()
			cv2.namedWindow("preview")

	def get_image(self):
		numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		self.ale.getScreenRGB(numpy_surface)
		image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		return image

	def newGame(self):
		self.ale.reset_game()
		return self.get_image()

	def next(self, action_indx):
		reward = self.ale.act(action_indx)	
		nextstate = self.get_image()
		# scipy.misc.imsave('test.png',nextstate)
		if self.vis:
			cv2.imshow('preview',nextstate)
		return nextstate, reward, self.ale.game_over()
Esempio n. 40
0
class Game():
    """
    Wrapper around the ALEInterface class.
    """

    def __init__(self, rom_file, sdl=False):
        self.ale = ALEInterface()
        # Setup SDL
        if sdl:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool(b'sound', False) # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool(b'sound', True)
            self.ale.setBool(b'display_screen', True)

        # Load rom
        self.ale.loadROM(str.encode(rom_file))

    def get_action_set(self):
        return self.ale.getLegalActionSet()

    def get_minimal_action_set(self):
        return self.ale.getMinimalActionSet()

    def game_over(self):
        return self.ale.game_over()

    def act(self, action):
        return self.ale.act(action)

    def reset_game(self):
        self.ale.reset_game()

    def get_frame(self):
        return self.ale.getScreenRGB()
def get_random_baseline(gamepath):
    ale = ALEInterface()
    ale.setInt('random_seed', 42)

    recordings_dir = './recordings/breakout/'

    USE_SDL = True
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False) # Sound doesn't work on OSX
            #ale.setString("record_screen_dir", recordings_dir);
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
            ale.setBool('display_screen', True)

    # Load the ROM file
    ale.loadROM(gamepath)

    # Get the list of legal actions
    legal_actions = ale.getLegalActionSet()

    # Play 5 episodes
    rewards = []
    for episode in xrange(10):
        total_reward = 0
        while not ale.game_over():
            a = legal_actions[randrange(len(legal_actions))]
            reward = ale.act(a);
            total_reward += reward
        rewards.append(total_reward)
        #print 'Episode', episode, 'ended with score:', total_reward
        ale.reset_game()
    avg_reward = sum(rewards) / float(len(rewards))
    return avg_reward
Esempio n. 42
0
class pyrlcade_environment(object):
    def init(self, rom_file, ale_frame_skip):

        self.ale = ALEInterface()

        self.max_frames_per_episode = self.ale.getInt(
            "max_num_frames_per_episode")
        self.ale.set("random_seed", 123)
        self.ale.set("disable_color_averaging", 1)
        self.ale.set("frame_skip", ale_frame_skip)

        self.ale.loadROM(rom_file)
        self.legal_actions = self.ale.getMinimalActionSet()
        ram_size = self.ale.getRAMSize()
        self.ram = np.zeros((ram_size), dtype=np.uint8)
        self.ale.getRAM(self.ram)

        self.state = self.ale.getRAM(self.ram)

    def reset_state(self):
        self.ale.reset_game()

    def set_action(self, a):
        self.action = a

    def step(self):
        self.reward = self.ale.act(self.action)
        is_terminal = self.ale.game_over()
        return is_terminal

    def get_state(self):
        self.ale.getRAM(self.ram)
        return self.ram

    def get_reward(self):
        return self.reward
Esempio n. 43
0
def get_random_baseline(gamepath):
    ale = ALEInterface()
    ale.setInt('random_seed', 42)

    recordings_dir = './recordings/breakout/'

    USE_SDL = True
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
            #ale.setString("record_screen_dir", recordings_dir);
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
            ale.setBool('display_screen', True)

    # Load the ROM file
    ale.loadROM(gamepath)

    # Get the list of legal actions
    legal_actions = ale.getLegalActionSet()

    # Play 5 episodes
    rewards = []
    for episode in xrange(10):
        total_reward = 0
        while not ale.game_over():
            a = legal_actions[randrange(len(legal_actions))]
            reward = ale.act(a)
            total_reward += reward
        rewards.append(total_reward)
        #print 'Episode', episode, 'ended with score:', total_reward
        ale.reset_game()
    avg_reward = sum(rewards) / float(len(rewards))
    return avg_reward
Esempio n. 44
0
 def act(self, a):
     lives = ALEInterface.lives(self)
     reward = ALEInterface.act(self, self.legal_actions[a])
     game_over = ALEInterface.game_over(self) or (not self.test_mode and ALEInterface.lives(self) < lives)
     return reward, game_over
Esempio n. 45
0
#     ale.setBool('display_screen', True)

# Load the ROM file
ale.loadROM('Breakout.bin')

# Get the list of legal actions
# legal_actions = ale.getLegalActionSet()
legal_actions = ale.getMinimalActionSet()
print legal_actions

# (screen_width,screen_height) = ale.getScreenDims()
# screen_data = np.zeros(screen_width*screen_height,dtype=np.uint32)
# ale.getScreenRGB(screen_data)

(screen_width, screen_height) = ale.getScreenDims()
screen_data = np.zeros(screen_width * screen_height, dtype=np.uint8)
print type(ale.getScreen(screen_data))

# Play 10 episodes
for episode in xrange(10):

    total_reward = 0
    while not ale.game_over():
        a = legal_actions[randrange(len(legal_actions))]
        # Apply an action and get the resulting reward
        reward = ale.act(a)
        print  reward
        total_reward += reward
    print 'Episode', episode, 'ended with score:', total_reward
    ale.reset_game()
Esempio n. 46
0
class AleAgent:
    ##
    # @param processing_cls Class for processing game visual unput
    def __init__(self, processing_cls, game_rom=None, encoder_model=None, encoder_weights=None, NFQ_model=None, NFQ_weights=None):
        assert game_rom is not None
        self.game = ALEInterface()
        if encoder_weights is not None and encoder_model is not None:
            self.encoder = Encoder(path_to_model=encoder_model, path_to_weights=encoder_weights)
        else:
            self.encoder = Encoder()

        self.processor = processing_cls()

        # Get & Set the desired settings
        self.game.setInt('random_seed', 0)
        self.game.setInt('frame_skip', 4)

        # Set USE_SDL to true to display the screen. ALE must be compilied
        # with SDL enabled for this to work. On OSX, pygame init is used to
        # proxy-call SDL_main.
        USE_SDL = True

        if USE_SDL:
            if sys.platform == 'darwin':
                pygame.init()
                self.game.setBool('sound', False)   # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.game.setBool('sound', False)   # no sound

            self.game.setBool('display_screen', True)

        # Load the ROM file
        self.game.loadROM(game_rom)

        # Get the list of legal actions
        self.legal_actions = self.game.getLegalActionSet()

        # Get actions applicable in current game
        self.minimal_actions = self.game.getMinimalActionSet()

        if NFQ_model is not None and NFQ_weights is not None:
            self.NFQ = NFQ(
                self.encoder.out_dim,
                len(self.minimal_actions),
                model_path=NFQ_model,
                weights_path=NFQ_weights
            )
        else:
            self.NFQ = NFQ(self.encoder.out_dim, len(self.minimal_actions))

        (self.screen_width, self.screen_height) = self.game.getScreenDims()
        self.screen_data = np.zeros(
            (self.screen_height, self.screen_width),
            dtype=np.uint8
        )

    ##
    # Initialize the reinforcement learning
    def train(self, num_of_episodes=1500, eps=0.995, key_binding=None):
        pygame.init()
        for episode in xrange(num_of_episodes):
            total_reward = 0
            moves = 0
            hits = 0
            print 'Starting episode: ', episode+1

            if key_binding:
                eps = 0.05
            else:
                eps -= 2/num_of_episodes

            self.game.getScreenGrayscale(self.screen_data)
            pooled_data = self.processor.process(self.screen_data)
            next_state = self.encoder.encode(pooled_data)
            while not self.game.game_over():
                current_state = next_state
                x = None

                if key_binding:
                    key_pressed = pygame.key.get_pressed()
                    x = key_binding(key_pressed)

                if x is None:
                    r = np.random.rand()
                    if r < eps:
                        x = np.random.randint(self.minimal_actions.size)
                    else:
                        x = self.NFQ.predict_action(current_state)

                a = self.minimal_actions[x]
                # Apply an action and get the resulting reward
                reward = self.game.act(a)

                # record only every 3 frames
                # if not moves % 3:
                self.game.getScreenGrayscale(self.screen_data)
                pooled_data = self.processor.process(self.screen_data)
                next_state = self.encoder.encode(pooled_data)
                transition = np.append(current_state, x)
                transition = np.append(transition, next_state)
                transition = np.append(transition, reward)
                self.NFQ.add_transition(transition)

                total_reward += reward
                if reward > 0:
                    hits += 1

                moves += 1
                if eps > 0.1:
                    eps -= 0.00001
            # end while

            print 'Epsilon: ', eps
            print 'Episode', episode+1, 'ended with score:', total_reward
            print 'Hits: ', hits
            self.game.reset_game()
            self.NFQ.train()
            hits = 0
            moves = 0
            self.NFQ.save_net()
        # end for

    ##
    # Play the game!
    def play(self):
        total_reward = 0
        moves = 1
        while not self.game.game_over():
            self.game.getScreenGrayscale(self.screen_data)
            pooled_data = self.processor.process(self.screen_data)
            current_state = self.encoder.encode(pooled_data)

            x = self.NFQ.predict_action(current_state)
            a = self.minimal_actions[x]
            reward = self.game.act(a)
            total_reward += reward
            moves += 1

        print 'The game ended with score:', total_reward, ' after: ', moves, ' moves'
Esempio n. 47
0
    font = pygame.font.SysFont("Ubuntu Mono",30)
    text = font.render("Total Reward: " + str(total_reward) ,1,(208,255,255))
    screen.blit(text,(330,line_pos))

    pygame.display.flip()

    #process pygame event queue
    exit=False
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            exit=True
            break;
    if(pressed[pygame.K_q]):
        exit = True
    if(exit):
        logger.close();
        break

    #delay to 60fps
    clock.tick(60.)

    if(ale.game_over()):
        episode_frame_number = ale.getEpisodeFrameNumber()
        frame_number = ale.getFrameNumber()
        print("Frame Number: " + str(frame_number) + " Episode Frame Number: " + str(episode_frame_number))
        print("Episode " + str(episode) + " ended with score: " + str(total_reward))
        ale.reset_game()
        total_reward = 0.0 
        episode = episode + 1

Esempio n. 48
0
class AtariEmulator(BaseEnvironment):
    def __init__(self,
                 rom_addr,
                 random_start=False,
                 random_seed=6,
                 visualize=True,
                 single_life=False):
        self.ale = ALEInterface()

        self.ale.setInt(b"random_seed", 2 * random_seed)
        # For fuller control on explicit action repeat (>= ALE 0.5.0)
        self.ale.setFloat(b"repeat_action_probability", 0.0)
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt(b"frame_skip", 1)
        self.ale.setBool(b"color_averaging", False)
        full_rom_path = rom_addr
        self.ale.loadROM(str.encode(full_rom_path))
        self.legal_actions = self.ale.getMinimalActionSet()
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.lives = self.ale.lives()
        self.writer = imageio.get_writer('breakout0.gif', fps=30)
        self.random_start = random_start
        self.single_life_episodes = single_life
        self.call_on_new_frame = visualize

        # Processed historcal frames that will be fed in to the network
        # (i.e., four 84x84 images)
        self.observation_pool = ObservationPool(
            np.zeros((84, 84, 4), dtype=np.uint8))
        self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3),
                                   dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1),
                                    dtype=np.uint8)
        self.frame_pool = FramePool(
            np.empty((2, self.screen_height, self.screen_width),
                     dtype=np.uint8), self.__process_frame_pool)

    def get_legal_actions(self):
        return self.legal_actions

    def __get_screen_image(self):
        """
        Get the current frame luminance
        :return: the current frame
        """
        self.ale.getScreenGrayscale(self.gray_screen)
        if self.call_on_new_frame:
            self.ale.getScreenRGB(self.rgb_screen)
            self.on_new_frame(self.rgb_screen)
        return np.squeeze(self.gray_screen)

    def on_new_frame(self, frame):

        pass

    def __new_game(self):
        """ Restart game """
        self.ale.reset_game()
        self.lives = self.ale.lives()
        if self.random_start:
            wait = random.randint(0, MAX_START_WAIT)
            for _ in range(wait):
                self.ale.act(self.legal_actions[0])

    def __process_frame_pool(self, frame_pool):
        """ Preprocess frame pool """

        img = np.amax(frame_pool, axis=0)
        img = imresize(img, (84, 84), interp='nearest')
        img = img.astype(np.uint8)
        return img

    def __action_repeat(self, a, times=ACTION_REPEAT):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in range(times - FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
        # Only need to add the last FRAMES_IN_POOL frames to the frame pool
        for i in range(FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
            self.frame_pool.new_frame(self.__get_screen_image())
        return reward

    def get_initial_state(self):
        """ Get the initial state """
        self.__new_game()
        for step in range(4):
            _ = self.__action_repeat(0)
            self.observation_pool.new_observation(
                self.frame_pool.get_processed_frame())
        if self.__is_terminal():
            raise Exception('This should never happen.')
        return self.observation_pool.get_pooled_observations()

    def next(self, action):
        """ Get the next state, reward, and game over signal """

        reward = self.__action_repeat(np.argmax(action))
        self.observation_pool.new_observation(
            self.frame_pool.get_processed_frame())
        terminal = self.__is_terminal()
        self.lives = self.ale.lives()
        observation = self.observation_pool.get_pooled_observations()
        return observation, reward, terminal

    def __is_terminal(self):
        if self.single_life_episodes:
            return self.__is_over() or (self.lives > self.ale.lives())
        else:
            return self.__is_over()

    def __is_over(self):
        return self.ale.game_over()

    def get_noop(self):
        return [1.0, 0.0]
Esempio n. 49
0
class AtariEmulator:

	def __init__(self, args):
		''' Initialize Atari environment '''

		# Parameters
		self.buffer_length = args.buffer_length
		self.screen_dims = args.screen_dims
		self.frame_skip = args.frame_skip
		self.blend_method = args.blend_method
		self.reward_processing = args.reward_processing
		self.max_start_wait = args.max_start_wait
		self.history_length = args.history_length
		self.start_frames_needed = self.buffer_length - 1 + ((args.history_length - 1) * self.frame_skip)

		#Initialize ALE instance
		self.ale = ALEInterface()
		self.ale.setFloat(b'repeat_action_probability', 0.0)
		if args.watch:
			self.ale.setBool(b'sound', True)
			self.ale.setBool(b'display_screen', True)
		self.ale.loadROM(str.encode(args.rom_path + '/' + args.game + '.bin'))

		self.buffer = np.empty((self.buffer_length, 210, 160))
		self.current = 0
		self.action_set = self.ale.getMinimalActionSet()
		self.lives = self.ale.lives()

		self.reset()


	def get_possible_actions(self):
		''' Return list of possible actions for game '''
		return self.action_set

	def get_screen(self):
		''' Add screen to frame buffer '''
		self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale())
		self.current = (self.current + 1) % self.buffer_length


	def reset(self):
		self.ale.reset_game()
		self.lives = self.ale.lives()

		if self.max_start_wait < 0:
			print("ERROR: max start wait decreased beyond 0")
			sys.exit()
		elif self.max_start_wait <= self.start_frames_needed:
			wait = 0
		else:
			wait = random.randint(0, self.max_start_wait - self.start_frames_needed)
		for _ in range(wait):
			self.ale.act(self.action_set[0])

		# Fill frame buffer
		self.get_screen()
		for _ in range(self.buffer_length - 1):
			self.ale.act(self.action_set[0])
			self.get_screen()
		# get initial_states
		state = [(self.preprocess(), 0, 0, False)]
		for step in range(self.history_length - 1):
			state.append(self.run_step(0))

		# make sure agent hasn't died yet
		if self.isTerminal():
			print("Agent lost during start wait.  Decreasing max_start_wait by 1")
			self.max_start_wait -= 1
			return self.reset()

		return state


	def run_step(self, action):
		''' Apply action to game and return next screen and reward '''

		raw_reward = 0
		for step in range(self.frame_skip):
			raw_reward += self.ale.act(self.action_set[action])
			self.get_screen()

		reward = None
		if self.reward_processing == 'clip':
			reward = np.clip(raw_reward, -1, 1)
		else:
			reward = raw_reward

		terminal = self.isTerminal()
		self.lives = self.ale.lives()

		return (self.preprocess(), action, reward, terminal, raw_reward)



	def preprocess(self):
		''' Preprocess frame for agent '''

		img = None

		if self.blend_method == "max":
			img = np.amax(self.buffer, axis=0)

		return cv2.resize(img, self.screen_dims, interpolation=cv2.INTER_LINEAR)

	def isTerminal(self):
		return (self.isGameOver() or (self.lives > self.ale.lives()))


	def isGameOver(self):
		return self.ale.game_over()
Esempio n. 50
0
class ALEEnvironment(Environment):
  def __init__(self, rom_file, args):
    from ale_python_interface import ALEInterface
    self.ale = ALEInterface()
    if args.display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', args.frame_skip)
    self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
    self.ale.setBool('color_averaging', args.color_averaging)

    if args.random_seed:
      self.ale.setInt('random_seed', args.random_seed)

    if args.record_screen_path:
      if not os.path.exists(args.record_screen_path):
        logger.info("Creating folder %s" % args.record_screen_path)
        os.makedirs(args.record_screen_path)
      logger.info("Recording screens to %s", args.record_screen_path)
      self.ale.setString('record_screen_dir', args.record_screen_path)

    if args.record_sound_filename:
      logger.info("Recording sound to %s", args.record_sound_filename)
      self.ale.setBool('sound', True)
      self.ale.setString('record_sound_filename', args.record_sound_filename)

    self.ale.loadROM(rom_file)

    if args.minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
      logger.info("Using minimal action set with size %d" % len(self.actions))
    else:
      self.actions = self.ale.getLegalActionSet()
      logger.info("Using full action set with size %d" % len(self.actions))
    logger.debug("Actions: " + str(self.actions))

    self.screen_width = args.screen_width
    self.screen_height = args.screen_height

    self.life_lost = False

  def numActions(self):
    return len(self.actions)

  def restart(self):
    # In test mode, the game is simply initialized. In train mode, if the game
    # is in terminal state due to a life loss but not yet game over, then only
    # life loss flag is reset so that the next game starts from the current
    # state. Otherwise, the game is simply initialized.
    if (
        self.mode == 'test' or
        not self.life_lost or  # `reset` called in a middle of episode
        self.ale.game_over()  # all lives are lost
    ):
      self.ale.reset_game()
    self.life_lost = False

  def act(self, action):
    lives = self.ale.lives()
    reward = self.ale.act(self.actions[action])
    self.life_lost = (not lives == self.ale.lives())
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, (self.screen_width, self.screen_height))
    return resized

  def isTerminal(self):
    if self.mode == 'train':
      return self.ale.game_over() or self.life_lost
    return self.ale.game_over()
Esempio n. 51
0
class Environment:
	"""docstring for Environment"""

	BUFFER_LEN = 2
	EPISODE_FRAMES = 18000
	EPOCH_COUNT = 200
	EPOCH_STEPS = 250000
	EVAL_EPS = 0.001
	FRAMES_SKIP = 4
	FRAME_HEIGHT = 84
	FRAME_WIDTH = 84
	MAX_NO_OP = 30
	MAX_REWARD = 1
	
	def __init__(self, rom_name, rng, display_screen = False):
		self.api = ALEInterface()
		self.api.setInt('random_seed', rng.randint(333))
		self.api.setBool('display_screen', display_screen)
		self.api.setFloat('repeat_action_probability', 0.0)
		self.rom_name = rom_name
		self.display_screen = display_screen
		self.rng = rng
		self.repeat = Environment.FRAMES_SKIP
		self.buffer_len = Environment.BUFFER_LEN
		self.height = Environment.FRAME_HEIGHT
		self.width = Environment.FRAME_WIDTH
		self.episode_steps = Environment.EPISODE_FRAMES / Environment.FRAMES_SKIP
		self.merge_id = 0
		self.max_reward = Environment.MAX_REWARD
		self.eval_eps = Environment.EVAL_EPS
		self.log_dir = ''
		self.network_dir = ''

		self.api.loadROM('../rom/' + self.rom_name)
		self.minimal_actions = self.api.getMinimalActionSet()
		original_width, original_height = self.api.getScreenDims()
		self.merge_frame = np.zeros((self.buffer_len
								, original_height
								, original_width)
								, dtype = np.uint8)

	def get_action_count(self):
		return len(self.minimal_actions)

	def train(self, agent, store_freq, folder = None, start_epoch = 0):
		self._open_log_files(agent, folder)
		obs = np.zeros((self.height, self.width), dtype = np.uint8)
		epoch_count = Environment.EPOCH_COUNT

		for epoch in xrange(start_epoch, epoch_count):
			self.need_reset = True
			steps_left = Environment.EPOCH_STEPS

			print "\n" + "=" * 50
			print "Epoch #%d" % (epoch + 1)
			episode = 0
			train_start = time.time()
			while steps_left > 0:
				num_step, _ = self._run_episode(agent, steps_left, obs)
				steps_left -= num_step
				episode += 1
				if steps_left == 0 or episode % 10 == 0:
					print "Finished episode #%d, steps_left = %d" \
						% (episode, steps_left)
			train_end = time.time()

			valid_values = agent.get_validate_values()
			eval_values = self.evaluate(agent)
			test_end = time.time()

			train_time = train_end - train_start
			test_time = test_end - train_end
			step_per_sec = Environment.EPOCH_STEPS * 1. / max(1, train_time)
			print "\tFinished epoch #%d, episode trained = %d\n" \
				"\tValidate values = %.3f, evaluate reward = %.3f\n"\
				"\tTrain time = %.0fs, test time = %.0fs, steps/sec = %.4f" \
					% (epoch + 1, episode, valid_values, eval_values\
						, train_time, test_time, step_per_sec)

			self._update_log_files(agent, epoch + 1, episode
								, valid_values, eval_values
								, train_time, test_time
								, step_per_sec, store_freq)
			gc.collect()

	def evaluate(self, agent, episodes = 30, obs = None):
		print "\n***Start evaluating"
		if obs is None:
			obs = np.zeros((self.height, self.width), dtype = np.uint8)
		sum_reward = 0.0
		sum_step = 0.0
		for episode in xrange(episodes):
			self.need_reset = True
			step, reward = self._run_episode(agent, self.episode_steps, obs
											, self.eval_eps, evaluating = True)
			sum_reward += reward
			sum_step += step
			print "Finished episode %d, reward = %d, step = %d" \
					% (episode + 1, reward, step)
		self.need_reset = True
		print "Average reward per episode = %.4f" % (sum_reward / episodes)
		print "Average step per episode = %.4f" % (sum_step / episodes)
		return sum_reward / episodes

	def _prepare_game(self):
		if self.need_reset or self.api.game_over():
			self.api.reset_game()
			self.need_reset = False
			if Environment.MAX_NO_OP > 0:
				num_no_op = self.rng.randint(Environment.MAX_NO_OP + 1) \
							+ self.buffer_len
				for _ in xrange(num_no_op):
					self.api.act(0)

		for _ in xrange(self.buffer_len):
			self._update_buffer()

	def _run_episode(self, agent, steps_left, obs
					, eps = 0.0, evaluating = False):
		self._prepare_game()

		start_lives = self.api.lives()
		step_count = 0
		sum_reward = 0
		is_terminal = False
		while step_count < steps_left and not is_terminal:
			self._get_screen(obs)
			action_id, _ = agent.get_action(obs, eps, evaluating)
			
			reward = self._repeat_action(self.minimal_actions[action_id])
			reward_clip = reward
			if self.max_reward > 0:
				reward_clip = np.clip(reward, -self.max_reward, self.max_reward)

			life_lost = not evaluating and self.api.lives() < start_lives
			is_terminal = self.api.game_over() or life_lost \
						or step_count + 1 >= steps_left

			agent.add_experience(obs, is_terminal, action_id, reward_clip
								, evaluating)
			sum_reward += reward
			step_count += 1
			
		return step_count, sum_reward

	def _update_buffer(self):
		self.api.getScreenGrayscale(self.merge_frame[self.merge_id, ...])
		self.merge_id = (self.merge_id + 1) % self.buffer_len

	def _repeat_action(self, action):
		reward = 0
		for i in xrange(self.repeat):
			reward += self.api.act(action)
			if i + self.buffer_len >= self.repeat:
				self._update_buffer()
		return reward

	def _get_screen(self, resized_frame):
		self._resize_frame(self.merge_frame.max(axis = 0), resized_frame)
				
	def _resize_frame(self, src_frame, dst_frame):
		cv2.resize(src = src_frame, dst = dst_frame,
					dsize = (self.width, self.height),
					interpolation = cv2.INTER_LINEAR)

	def _open_log_files(self, agent, folder):
		time_str = time.strftime("_%m-%d-%H-%M", time.localtime())
		base_rom_name = os.path.splitext(os.path.basename(self.rom_name))[0]


		if folder is not None:
			self.log_dir = folder
			self.network_dir = self.log_dir + '/network'
		else:
			self.log_dir = '../run_results/' + base_rom_name + time_str
			self.network_dir = self.log_dir + '/network'

		info_name = get_next_name(self.log_dir, 'info', 'txt')
		git_name = get_next_name(self.log_dir, 'git-diff', '')

		try:
			os.stat(self.log_dir)
		except OSError:
			os.makedirs(self.log_dir)

		try:
			os.stat(self.network_dir)
		except OSError:
			os.makedirs(self.network_dir)

		with open(os.path.join(self.log_dir, info_name), 'w') as f:
			f.write('Commit: ' + subprocess.check_output(['git', 'rev-parse'
														, 'HEAD']))
			f.write('Run command: ')
			f.write(' '.join(pipes.quote(x) for x in sys.argv))
			f.write('\n\n')
			f.write(agent.get_info())
			write_info(f, Environment)
			write_info(f, agent.__class__)
			write_info(f, agent.network.__class__)

		# From https://github.com/spragunr/deep_q_rl/pull/49/files
		with open(os.path.join(self.log_dir, git_name), 'w') as f:
			f.write(subprocess.check_output(['git', 'diff', 'HEAD']))

		if folder is not None:
			return

		with open(os.path.join(self.log_dir, 'results.csv'), 'w') as f:
			f.write("epoch,episode_train,validate_values,evaluate_reward"\
				",train_time,test_time,steps_per_second\n")

		mem = psutil.virtual_memory()
		with open(os.path.join(self.log_dir, 'memory.csv'), 'w') as f:
			f.write("epoch,available,free,buffers,cached"\
					",available_readable,used_percent\n")
			f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \
					(0, mem.available, mem.free, mem.buffers, mem.cached
					, bytes2human(mem.available), mem.percent))

	def _update_log_files(self, agent, epoch, episode, valid_values
						, eval_values, train_time, test_time, step_per_sec
						, store_freq):
		print "Updating log files"
		with open(self.log_dir + '/results.csv', 'a') as f:
			f.write("%d,%d,%.4f,%.4f,%d,%d,%.4f\n" % \
						(epoch, episode, valid_values, eval_values
						, train_time, test_time, step_per_sec))

		mem = psutil.virtual_memory()
		with open(self.log_dir + '/memory.csv', 'a') as f:
			f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \
					(epoch, mem.available, mem.free, mem.buffers, mem.cached
					, bytes2human(mem.available), mem.percent))

		agent.dump_network(self.network_dir + ('/%03d' % (epoch)) + '.npz')

		if (store_freq >= 0 and epoch >= Environment.EPOCH_COUNT) or \
				(store_freq > 0 and (epoch % store_freq == 0)):
			agent.dump_exp(self.network_dir + '/exp.npz')

	def _setup_record(self, network_file):
		file_name, _ = os.path.splitext(os.path.basename(network_file))
		time_str = time.strftime("_%m-%d-%H-%M", time.localtime())
		img_dir = os.path.dirname(network_file) + '/images_' \
					+ file_name + time_str
		rom_name, _ = os.path.splitext(self.rom_name)
		out_name = os.path.dirname(network_file) + '/' + rom_name + '_' \
					+ file_name + time_str + '.mov'
		print out_name

		try:
			os.stat(img_dir)
		except OSError:
			os.makedirs(img_dir)

		self.api.setString('record_screen_dir', img_dir)
		self.api.loadROM('../rom/' + self.rom_name)
		return img_dir, out_name

	def record_run(self, agent, network_file, episode_id = 1):
		if episode_id > 1:
			self.evaluate(agent, episode_id - 1)
			system_state = self.api.cloneSystemState()

		img_dir, out_name = self._setup_record(network_file)

		if episode_id > 1:
			self.api.restoreSystemState(system_state)

		self.evaluate(agent, 1)
		script = \
				"""
					{
						ffmpeg -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s
					} || {
						avconv -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s
					}
				""" % (img_dir, out_name, img_dir, out_name)
		os.system(script)
Esempio n. 52
0
class AtariGame(Task):
    ''' RL task based on Arcade Game.
    '''

    def __init__(self, rom_path, num_frames=4, live=False, skip_frame=0, mode='normal'):
        self.ale = ALEInterface()
        if live:
            USE_SDL = True
            if USE_SDL:
                if sys.platform == 'darwin':
                    import pygame
                    pygame.init()
                    self.ale.setBool('sound', False) # Sound doesn't work on OSX
                elif sys.platform.startswith('linux'):
                    self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)
        self.mode = mode
        self.live = live
        self.ale.loadROM(rom_path)
        self.num_frames = num_frames
        self.frames = []
        self.frame_id = 0
        self.cum_reward = 0
        self.skip_frame = skip_frame
        if mode == 'small':
            img = T.matrix('img')
            self.max_pool = theano.function([img], max_pool_2d(img, [4, 4]))
            self.img_shape = (16, 16)
        else:
            self.img_shape = (84, 84) # image shape according to DQN Nature paper.
        while len(self.frames) < 4:
            self.step(choice(self.valid_actions, 1)[0])
        self.reset()


    def copy(self):
        import dill as pickle
        return pickle.loads(pickle.dumps(self))


    def reset(self):
        self.ale.reset_game()
        self.frame_id = 0
        self.cum_reward = 0
        if self.skip_frame:
            for frame_i in range(self.skip_frame):
                self.step(choice(self.valid_actions, 1)[0])


    @property
    def _curr_frame(self):
        img = self.ale.getScreenRGB()
        img = rgb2yuv(img)[:, :, 0] # get Y channel, according to Nature paper.
        # print 'RAM', self.ale.getRAM()
        if self.mode == 'small':
            img = self.max_pool(img)
        img = imresize(img, self.img_shape, interp='bicubic')
        return  img


    @property
    def curr_state(self):
        '''
        return raw pixels.
        '''
        return np.array(self.frames, dtype=floatX) / floatX(255.) # normalize


    @property
    def state_shape(self):
        return self.curr_state.shape


    @property
    def num_actions(self):
        return len(self.valid_actions)


    @property
    def valid_actions(self):
        return self.ale.getLegalActionSet()


    def step(self, action):
        reward = self.ale.act(action)
        if len(self.frames) == self.num_frames:
            self.frames = self.frames[1:]
        self.frames.append(self._curr_frame)
        self.frame_id += 1
        #print 'frame_id', self.frame_id
        self.cum_reward += reward
        return reward # TODO: scale the gradient up.


    def is_end(self):
        if np.abs(self.cum_reward) > 0:
            return True
        return self.ale.game_over()


    def visualize(self, fig=1, fname=None, format='png'):
        import matplotlib.pyplot as plt
        fig = plt.figure(fig, figsize=(5,5))
        plt.clf()
        plt.axis('off')
        #res = plt.imshow(self.ale.getScreenRGB())
        res = plt.imshow(self._curr_frame, interpolation='none')
        if fname:
            plt.savefig(fname, format=format)
        else:
            plt.show()
        return res
Esempio n. 53
0
class GameManager(object):
    """This class takes care of the interactions between an agent and
    a game across episodes, as well as overall logging of performance.
    """

    def __init__(
        self,
        game_name,
        agent,
        results_dir,
        n_epochs=1,
        n_episodes=None,
        n_frames=None,
        remove_old_results_dir=False,
        use_minimal_action_set=True,
        min_time_between_frames=0,
    ):
        """game_name is one of the supported games (there are many), as a string: "space_invaders.bin"
        agent is an an instance of a subclass of the Agent interface
        results_dir is a string representing a directory in which results and logs are placed
            If it does not exist, it is created.
        use_minimal_action_set determines whether the agent is offered all possible actions,
            or only those (minimal) that are applicable to the specific game.
        min_time_between_frames is the minimum required time in seconds between
            frames. If 0, the game is unrestricted.
        """
        self.game_name = game_name
        self.agent = agent
        self.use_minimal_action_set = use_minimal_action_set
        self.min_time_between_frames = min_time_between_frames
        self.n_epochs = n_epochs
        self.n_episodes = n_episodes
        self.n_frames = n_frames

        if (n_episodes is None and n_frames is None) or (n_episodes is not None and n_frames is not None):
            raise ValueError("Extacly one of n_episodes and n_frames " "must be defined")

        self.initialize_results_dir(results_dir, remove_old_results_dir)

        self.log = util.logging.Logger(
            ("settings", "step", "episode", "epoch", "overall"),
            "settings",
            os.path.join(self.results_dir, "GameManager.log"),
        )

        self.stats = util.logging.CSVLogger(
            os.path.join(self.results_dir, "stats.log"),
            header="epoch,episode,total_reward,n_frames,wall_time",
            print_items=True,
        )

        self._object_cache = dict()

        self.initialize_ale()
        self.initialize_agent()

        self.dump_settings()

    def initialize_results_dir(self, results_dir, remove_existing=False):
        """Creates the whole path of directories if they do no exist.
        If they do exist, raises an error unless remove_existing is True,
        in which case the existing directory is deleted.
        """
        now = datetime.now().strftime("%Y%m%d-%H-%M")
        # drop .bin, append current time down to the minute
        results_dir = os.path.join(results_dir, self.game_name[:-4] + now)

        if remove_existing:
            if os.path.exists(results_dir):
                shutil.rmtree(results_dir)
        # Should raise an error if directory exists
        os.makedirs(results_dir)

        self.results_dir = results_dir

    def initialize_ale(self):
        self.ale = ALEInterface()
        self.ale.loadROM(os.path.join(ROM_RELATIVE_LOCATION, self.game_name))

    def initialize_agent(self):
        RSC = namedtuple("RawStateCallbacks", ["raw", "grey", "rgb", "ram"])
        raw_state_callbacks = RSC(self.get_screen, self.get_screen_grayscale, self.get_screen_RGB, self.get_RAM)

        self.agent.set_raw_state_callbacks(raw_state_callbacks)
        self.agent.set_results_dir(self.results_dir)

        if self.use_minimal_action_set:
            actions = self.ale.getMinimalActionSet()
        else:
            actions = self.ale.getLegalActionSet()

        self.agent.set_available_actions(actions)

    def rest(self, already_elapsed):
        rest_time = self.min_time_between_frames - already_elapsed
        if rest_time > 0:
            sleep(rest_time)

    def run(self):
        """Runs self.n_epochs epochs, where the agent's learning is
        reset for each new epoch.
        Each epoch lasts self.n_episodes or self.n_frames, whichever is 
            defined.
        """
        self.log.overall("Starting run")
        run_start = time()
        for epoch in xrange(self.n_epochs):
            self.agent.reset()
            self.n_epoch = epoch
            self._run_epoch()
        self.log.overall("End of run ({:.2f} s)".format(time() - run_start))

    def _run_epoch(self):
        self.n_episode = 0

        start = time()
        while not self._stop_condition_met():
            self._run_episode()
            self.n_episode += 1
        wall_time = time() - start
        frames = self.ale.getFrameNumber()

        self.log.epoch("Finished epoch after {:.2f} seconds".format(wall_time))

    def _run_episode(self):
        self.ale.reset_game()
        self.agent.on_episode_start()

        total_reward = 0
        episode_start = time()

        while (not self.ale.game_over()) and (not self._stop_condition_met()):
            timestep_start = time()

            action = self.agent.select_action()
            reward = self.ale.act(action)
            self.agent.receive_reward(reward)

            total_reward += reward

            self.rest(time() - timestep_start)

        wall_time = time() - episode_start
        self.agent.on_episode_end()

        # Stats format: CSV with epoch, episode, total_reward, n_frames, wall_time
        self.stats.write(
            self.n_epoch, self.n_episode, total_reward, self.ale.getEpisodeFrameNumber(), "{:.2f}".format(wall_time)
        )

    def _stop_condition_met(self):
        if self.n_episodes:
            return self.n_episode >= self.n_episodes
        return self.ale.getFrameNumber() >= self.n_frames

    # Methods for state perception
    def get_screen(self):
        """Returns a matrix containing the current game screen in raw pixel data,
        i.e. before conversion to RGB. Handles reuse of np.array object, so it 
        will overwrite what is in the old object"""
        return self._cached("raw", self.ale.getScreen)

    def get_screen_grayscale(self):
        """Returns an np.array with the screen grayscale colours. 
        Handles reuse of np.array object, so it will overwrite what 
        is in the old object.
        """
        return self._cached("gray", self.ale.getScreenGrayscale)

    def get_screen_RGB(self):
        """Returns a numpy array with the screen's RGB colours. 
        The first positions contain the red colours, followed by
        the green colours and then the blue colours"""
        return self._cached("rgb", self.ale.getScreenRGB)

    def get_RAM(self):
        """Returns a vector containing current RAM content (byte-level).
        Handles reuse of np.array object, so it will overwrite what 
        is in the old object"""
        return self._cached("ram", self.ale.getRAM)

    def _cached(self, key, func):
        if key in self._object_cache:
            func(self._object_cache[key])
        else:
            self._object_cache[key] = func()

        return self._object_cache[key]

    def dump_settings(self):
        import json

        settings = self.get_settings()
        path = os.path.join(self.results_dir, "settings")
        with open(path, "w") as f:
            json.dump(settings, f, indent=4)

    def get_settings(self):
        """Returns a dict representing the settings needed to 
        reproduce this object and its subobjects
        """
        return {
            "game_name": self.game_name,
            "n_epochs": self.n_epochs,
            "n_episodes": self.n_episodes,
            "n_frames": self.n_frames,
            "agent": self.agent.get_settings(),
            "results_dir": self.results_dir,
            "use_minimal_action_set": self.use_minimal_action_set,
        }
Esempio n. 54
0
class GameState(object):
  def __init__(self, rand_seed, display=False):
    self.ale = ALEInterface()
    self.ale.setInt('random_seed', rand_seed)

    if display:
      self._setup_display()
    
    self.ale.loadROM(ROM)

    # height=210, width=160
    self.screen = np.empty((210, 160, 1), dtype=np.uint8)
    
    no_action = 0
    
    self.reward = self.ale.act(no_action)
    self.terminal = self.ale.game_over()

    # screenのshapeは、(210, 160, 1)
    self.ale.getScreenGrayscale(self.screen)
    
    # (210, 160)にreshape
    reshaped_screen = np.reshape(self.screen, (210, 160))
    
    # height=110, width=84にリサイズ
    resized_screen = cv2.resize(reshaped_screen, (84, 110))
    
    x_t = resized_screen[18:102,:]
    x_t = x_t.astype(np.float32)
    x_t *= (1.0/255.0)
    self.s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2)

    # 実際に利用するactionのみを集めておく
    self.real_actions = self.ale.getMinimalActionSet()
    
  def _setup_display(self):
    if sys.platform == 'darwin':
      import pygame
      pygame.init()
      self.ale.setBool('sound', False)
    elif sys.platform.startswith('linux'):
      self.ale.setBool('sound', True)
    self.ale.setBool('display_screen', True)
    
  def process(self, action):
    # 18種類のうちの実際に利用するactionに変換
    real_action = self.real_actions[action]
    self.reward = self.ale.act(real_action)
    #self.reward = self.ale.act(action)
    self.terminal = self.ale.game_over()
    
    # screenのshapeは、(210, 160, 1)
    self.ale.getScreenGrayscale(self.screen)
    
    # (210, 160)にreshape
    reshaped_screen = np.reshape(self.screen, (210, 160))
    
    # height=210, width=160
    
    # height=110, width=84にリサイズ
    resized_screen = cv2.resize(reshaped_screen, (84, 110))
    x_t1 = resized_screen[18:102,:]
    x_t1 = np.reshape(x_t1, (84, 84, 1))
    x_t1 = x_t1.astype(np.float32)    
    x_t1 *= (1.0/255.0)
    
    self.s_t1 = np.append(x_t1, self.s_t[:,:,0:3], axis = 2)
    if self.terminal:
      self.ale.reset_game()

  def update(self):
    self.s_t = self.s_t1
Esempio n. 55
0
class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    NOTE: will automatically restart when a real episode ends
    """
    def __init__(self, rom_file, viz=0, height_range=(None,None),
            frame_skip=4, image_shape=(84, 84), nullop_start=30,
            live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_dir('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
                "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)

            self.ale.setInt(b"random_seed", self.rng.randint(0, 10000))
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()


        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) float32 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                #m = cv2.resize(ret, (1920,1200))
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1],:].astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_action_space(self):
        return DiscreteActionSpace(len(self.actions))

    def restart_episode(self):
        if self.current_episode_score.count > 0:
            self.stats['score'].append(self.current_episode_score.sum)
        self.current_episode_score.reset()
        self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if isOver:
            self.restart_episode()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        return (r, isOver)
Esempio n. 56
0
class MyEnv(Environment):
    VALIDATION_MODE = 0

    def __init__(self, rng, rom="ale/breakout.bin", frame_skip=4, 
                 ale_options=[{"key": "random_seed", "value": 0}, 
                              {"key": "color_averaging", "value": True},
                              {"key": "repeat_action_probability", "value": 0.}]):
        self._mode = -1
        self._modeScore = 0.0
        self._modeEpisodeCount = 0

        self._frameSkip = frame_skip if frame_skip >= 1 else 1
        self._randomState = rng

        self._ale = ALEInterface()
        for option in ale_options:
            t = type(option["value"])
            if t is int:
                self._ale.setInt(option["key"], option["value"])
            elif t is float:
                self._ale.setFloat(option["key"], option["value"])
            elif t is bool:
                self._ale.setBool(option["key"], option["value"])
            else:
                raise ValueError("Option {} ({}) is not an int, bool or float.".format(option["key"], t))
        self._ale.loadROM(rom)

        w, h = self._ale.getScreenDims()
        self._screen = np.empty((h, w), dtype=np.uint8)
        self._reducedScreen = np.empty((84, 84), dtype=np.uint8)
        self._actions = self._ale.getMinimalActionSet()

                
    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._modeScore = 0.0
                self._modeEpisodeCount = 0
            else:
                self._modeEpisodeCount += 1
        elif self._mode != -1: # and thus mode == -1
            self._mode = -1

        self._ale.reset_game()
        for _ in range(self._randomState.randint(15)):
            self._ale.act(0)
        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
        
        return [4 * [84 * [84 * [0]]]]
        
        
    def act(self, action):
        action = self._actions[action]
        
        reward = 0
        for _ in range(self._frameSkip):
            reward += self._ale.act(action)
            if self.inTerminalState():
                break
            
        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
  
        self._modeScore += reward
        return np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._modeEpisodeCount += 1
        print("== Mean score per episode is {} over {} episodes ==".format(self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount))


    def inputDimensions(self):
        return [(4, 84, 84)]

    def observationType(self, subject):
        return np.uint8

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._reducedScreen)]

    def inTerminalState(self):
        return self._ale.game_over()
Esempio n. 57
0
class AtariEnvironment:
    
    def __init__(self, args, outputDir):
        
        self.outputDir = outputDir
        self.screenCaptureFrequency = args.screen_capture_freq
        
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', 123456)
        random.seed(123456)
        # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo
        self.ale.setFloat(b'repeat_action_probability', 0.0)

        # Load the ROM file
        self.ale.loadROM(args.rom)

        self.actionSet = self.ale.getMinimalActionSet()
        self.gameNumber = 0
        self.stepNumber = 0
        self.resetGame()

    def getNumActions(self):
        return len(self.actionSet)

    def getState(self):
        return self.state
    
    def getGameNumber(self):
        return self.gameNumber
    
    def getFrameNumber(self):
        return self.ale.getFrameNumber()
    
    def getEpisodeFrameNumber(self):
        return self.ale.getEpisodeFrameNumber()
    
    def getEpisodeStepNumber(self):
        return self.episodeStepNumber
    
    def getStepNumber(self):
        return self.stepNumber
    
    def getGameScore(self):
        return self.gameScore

    def isGameOver(self):
        return self.ale.game_over()

    def step(self, action):
        previousLives = self.ale.lives()
        reward = 0
        isTerminal = 0
        self.stepNumber += 1
        self.episodeStepNumber += 1
        
        for i in range(4):
            prevScreenRGB = self.ale.getScreenRGB()
            reward += self.ale.act(self.actionSet[action])
            screenRGB = self.ale.getScreenRGB()
    
            # Detect end of episode, I don't think I'm handling this right in terms
            # of the overall game loop (??)
            if self.ale.lives() < previousLives or self.ale.game_over():
                isTerminal = 1
                break

            if self.gameNumber % self.screenCaptureFrequency == 0:
                dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber)
                if not os.path.isdir(dir):
                    os.makedirs(dir)
                self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber()))


        maxedScreen = np.maximum(screenRGB, prevScreenRGB)
        self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber())
        self.gameScore += reward
        return reward, self.state, isTerminal

    def resetGame(self):
        if self.ale.game_over():
            self.gameNumber += 1
        self.ale.reset_game()
        self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber())
        self.gameScore = 0
        self.episodeStepNumber = 0 # environment steps vs ALE frames.  Will probably be 4*frame number
Esempio n. 58
0
class MyEnv(Environment):
    VALIDATION_MODE = 0

    def __init__(self,
                 rng,
                 rom="ale/breakout.bin",
                 frame_skip=4,
                 ale_options=[{
                     "key": "random_seed",
                     "value": 0
                 }, {
                     "key": "color_averaging",
                     "value": True
                 }, {
                     "key": "repeat_action_probability",
                     "value": 0.
                 }]):
        self._mode = -1
        self._modeScore = 0.0
        self._modeEpisodeCount = 0

        self._frameSkip = frame_skip if frame_skip >= 1 else 1
        self._randomState = rng

        self._ale = ALEInterface()
        for option in ale_options:
            t = type(option["value"])
            if t is int:
                self._ale.setInt(option["key"], option["value"])
            elif t is float:
                self._ale.setFloat(option["key"], option["value"])
            elif t is bool:
                self._ale.setBool(option["key"], option["value"])
            else:
                raise ValueError(
                    "Option {} ({}) is not an int, bool or float.".format(
                        option["key"], t))
        self._ale.loadROM(rom)

        w, h = self._ale.getScreenDims()
        self._screen = np.empty((h, w), dtype=np.uint8)
        self._reducedScreen = np.empty((84, 84), dtype=np.uint8)
        self._actions = self._ale.getMinimalActionSet()

    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._modeScore = 0.0
                self._modeEpisodeCount = 0
            else:
                self._modeEpisodeCount += 1
        elif self._mode != -1:  # and thus mode == -1
            self._mode = -1

        self._ale.reset_game()
        for _ in range(self._randomState.randint(15)):
            self._ale.act(0)
        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84),
                   self._reducedScreen,
                   interpolation=cv2.INTER_NEAREST)

        return [4 * [84 * [84 * [0]]]]

    def act(self, action):
        action = self._actions[action]

        reward = 0
        for _ in range(self._frameSkip):
            reward += self._ale.act(action)
            if self.inTerminalState():
                break

        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84),
                   self._reducedScreen,
                   interpolation=cv2.INTER_NEAREST)

        self._modeScore += reward
        return np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._modeEpisodeCount += 1
        print("== Mean score per episode is {} over {} episodes ==".format(
            self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount))

    def inputDimensions(self):
        return [(4, 84, 84)]

    def observationType(self, subject):
        return np.uint8

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._reducedScreen)]

    def inTerminalState(self):
        return self._ale.game_over()
Esempio n. 59
0
            sess.run(Q_target.b_fc2.assign(Q_train.b_fc2))

        if frameCount % saveFrame is 0:
            if saveModel is True:
                saver.save(sess, modelPath)
            if saveData is True:
                memory.save()



    if explorationRate > 0.1:
        explorationRate -= explorationRateDelta



    if ale.game_over():
        cost_average /= (1.0 * (frameCount - frameCountLast)/trainFreq)
        Q_average /= (1.0 * batchSize*n_actions*(frameCount - frameCountLast)/trainFreq)
        episode += 1
        print 'Epi: %07d Score: %03d Exp: %.2f Frame: %08d Cost: %.6f FPS:%.2f Q: %.2f' \
              % (episode, scoreEpisode, explorationRate, frameCount, cost_average,
                 (frameCount - frameCountLast) / (time.time() - t0) ,Q_average      )

        # print t1s, t2s# t3s, t4s, t5s,t6s,t7s

        t0 = time.time()
        ale.reset_game()
        scoreEpisode = 0.0
        cost_average = 0.0
        Q_average = 0.0
        frameCountLast = frameCount