Python ALEInterface.getMinimalActionSetの例、ale_python_interface.ALEInterface.getMinimalActionSet Pythonの例

コード例 #1

0

ファイルを表示

ファイル: emulator.py プロジェクト: amharc/jnp3

class Emulator(object):
    FRAME_SKIP = 4
    SCREEN_WIDTH = 84
    SCREEN_HEIGHT = 84

    def __init__(self, rom):
        self.ale = ALEInterface()
        self.max_num_frames_per_episode = 100000 #self.ale.getInt('max_num_frames_per_episode')
        self.ale.setInt('frame_skip', self.FRAME_SKIP)
        self.ale.loadROM('roms/' + rom)
        self.actions = self.ale.getMinimalActionSet()
        
    def reset(self):
        self.ale.reset_game()

    def image(self):
        screen = self.ale.getScreenGrayscale()
        screen = cv2.resize(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH))
        return np.reshape(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH))

    def act(self, action):
        return self.ale.act(action)

    def terminal(self):
        return self.ale.game_over()

コード例 #2

0

ファイルを表示

ファイル: emulateStuff.py プロジェクト: pavitrakumar78/Playing-custom-games-using-Deep-Learning

class Emulate:
  def __init__(self, rom_file, display_screen=False,frame_skip=4,screen_height=84,screen_width=84,repeat_action_probability=0,color_averaging=True,random_seed=0,record_screen_path='screen_pics',record_sound_filename=None,minimal_action_set=True):
    self.ale = ALEInterface()
    if display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', frame_skip)
    self.ale.setFloat('repeat_action_probability', repeat_action_probability)
    self.ale.setBool('color_averaging', color_averaging)

    if random_seed:
      self.ale.setInt('random_seed', random_seed)

    self.ale.loadROM(rom_file)

    if minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
    else:
      self.actions = self.ale.getLegalActionSet()

    self.dims = (screen_width,screen_height)

  def numActions(self):
    return len(self.actions)

  def getActions(self):
  	return self.actions

  def restart(self):
    self.ale.reset_game()

  def act(self, action):
    reward = self.ale.act(self.actions[action])
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    return resized

  def getScreenGray(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    rotated = np.rot90(resized,k=1)
    return rotated

  def getScreenColor(self):
    screen = self.ale.getScreenRGB()
    resized = cv2.resize(screen, self.dims)
    rotated = np.rot90(resized,k=1)
    return rotated

  def isTerminal(self):
    return self.ale.game_over()

コード例 #3

0

ファイルを表示

ファイル: emulator.py プロジェクト: amharc/jnp3

class Emulator(object):
    def __init__(self, settings):
        self.ale = ALEInterface()
        self.ale.setInt('frame_skip', settings['frame_skip'])
        self.ale.setInt('random_seed', np.random.RandomState().randint(1000))
        self.ale.setBool('color_averaging', False)
        self.ale.loadROM('roms/' + settings['rom_name'])
        self.actions = self.ale.getMinimalActionSet()
        self.width = settings['screen_width']
        self.height = settings['screen_height']
        
    def reset(self):
        self.ale.reset_game()

    def image(self):
        screen = self.ale.getScreenGrayscale()
        screen = cv2.resize(screen, (self.height, self.width),
                interpolation=cv2.INTER_LINEAR)
        return np.reshape(screen, (self.height, self.width))

    def full_image(self):
        screen = self.ale.getScreenRGB()
        return screen

    def act(self, action):
        return self.ale.act(self.actions[action])

    def terminal(self):
        return self.ale.game_over()

コード例 #4

0

ファイルを表示

ファイル: pyrlcade_environment.py プロジェクト: chenbokaix250/pyrlcade

class pyrlcade_environment(object):
    def init(self,rom_file,ale_frame_skip):

        self.ale = ALEInterface()

        self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode");
        self.ale.set("random_seed",123)
        self.ale.set("disable_color_averaging",1)
        self.ale.set("frame_skip",ale_frame_skip)

        self.ale.loadROM(rom_file)
        self.legal_actions = self.ale.getMinimalActionSet()
        ram_size = self.ale.getRAMSize()
        self.ram = np.zeros((ram_size),dtype=np.uint8)
        self.ale.getRAM(self.ram)

        self.state = self.ale.getRAM(self.ram)

    def reset_state(self):
        self.ale.reset_game()

    def set_action(self,a):
        self.action = a

    def step(self):
        self.reward = self.ale.act(self.action)
        is_terminal = self.ale.game_over()
        return is_terminal

    def get_state(self):
        self.ale.getRAM(self.ram)
        return self.ram

    def get_reward(self):
        return self.reward

コード例 #5

0

ファイルを表示

ファイル: launcher.py プロジェクト: mowayao/deep-reinforcement-learning

def launch():
	logging.basicConfig(level=logging.INFO)
	myArgs = getParameters()
	rom = myArgs.game
	full_rom_path = os.path.join(myArgs.base_rom_path,rom)
	rng = np.random.RandomState()
	ale = ALEInterface()
	ale.setInt('random_seed',38)
	ale.setBool('display_screen',myArgs.display_screen)
	ale.setInt('frame_skip',myArgs.frame_skip)
	ale.setFloat('repeat_action_probability',myArgs.repeat_action_probability)

	ale.loadROM(full_rom_path)
	valid_actions = ale.getMinimalActionSet()
	'''for episode in xrange(10):
		total_reward = 0
		while not ale.game_over():
			from random import randrange
			a = valid_actions[randrange(len(valid_actions))]
			ale.act(a)
			#print reward
			#print ale.getScreenRGB()

			#total_reward += reward
			#print 'Episode', episode, 'ended with score:', total_reward
		ale.reset_game()
	'''
	memory_pool = ReplayMemory(myArgs.memory_size,rng)
	network_model = buildNetwork(myArgs.resized_height,myArgs.resized_width,myArgs.rmsp_epsilon,myArgs.rmsp_rho,myArgs.learning_rate,len(valid_actions))
	ddqn = DDQN(network_model,valid_actions,myArgs.target_nn_update_frequency,myArgs.discount,myArgs.phi_len)
	agent = Agent(myArgs,ddqn,memory_pool,valid_actions,rng)
	train_agent = TrainMyAgent(myArgs,ale,agent,valid_actions,rng)
	train_agent.run()

コード例 #6

0

ファイルを表示

ファイル: environment.py プロジェクト: BwRy/simple_dqn

class Environment:
  def __init__(self, rom_file, args):
    self.ale = ALEInterface()
    if args.display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', args.frame_skip)
    self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
    self.ale.setBool('color_averaging', args.color_averaging)

    if args.random_seed:
      self.ale.setInt('random_seed', args.random_seed)

    if args.record_screen_path:
      if not os.path.exists(args.record_screen_path):
        logger.info("Creating folder %s" % args.record_screen_path)
        os.makedirs(args.record_screen_path)
      logger.info("Recording screens to %s", args.record_screen_path)
      self.ale.setString('record_screen_dir', args.record_screen_path)

    if args.record_sound_filename:
      logger.info("Recording sound to %s", args.record_sound_filename)
      self.ale.setBool('sound', True)
      self.ale.setString('record_sound_filename', args.record_sound_filename)

    self.ale.loadROM(rom_file)

    if args.minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
      logger.info("Using minimal action set with size %d" % len(self.actions))
    else:
      self.actions = self.ale.getLegalActionSet()
      logger.info("Using full action set with size %d" % len(self.actions))
    logger.debug("Actions: " + str(self.actions))

    self.dims = (args.screen_height, args.screen_width)

  def numActions(self):
    return len(self.actions)

  def restart(self):
    self.ale.reset_game()

  def act(self, action):
    reward = self.ale.act(self.actions[action])
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, self.dims)
    return resized

  def isTerminal(self):
    return self.ale.game_over()

コード例 #7

0

ファイルを表示

ファイル: emulator.py プロジェクト: hercky/a3c

class Emulator:
    def __init__(self):
    
        self.ale = ALEInterface()
        
        # turn off the sound
        self.ale.setBool('sound', False)
        
        self.ale.setBool('display_screen', EMULATOR_DISPLAY)

        self.ale.setInt('frame_skip', FRAME_SKIP)
        self.ale.setFloat('repeat_action_probability', REPEAT_ACTION_PROBABILITY)
        self.ale.setBool('color_averaging', COLOR_AVERAGING)

        self.ale.setInt('random_seed', RANDOM_SEED)

        if RECORD_SCENE_PATH:
            self.ale.setString('record_screen_dir', RECORD_SCENE_PATH)


        self.ale.loadROM(ROM_PATH)

        self.actions = self.ale.getMinimalActionSet()
        logger.info("Actions: " + str(self.actions))

        self.dims = DIMS
        #self.start_lives = self.ale.lives()

    def getActions(self):
        return self.actions

    def numActions(self):
        return len(self.actions)

    def restart(self):
        self.ale.reset_game()
        # can be omitted

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def getScreen(self):
        # why grayscale ?
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        # normalize
        #resized /= COLOR_SCALE

        return resized

    def isTerminal(self):
        # while training deepmind only ends when agent dies
        #terminate = DEATH_END and TRAIN and (self.ale.lives() < self.start_lives)

        return self.ale.game_over()

コード例 #8

0

ファイルを表示

ファイル: atari.py プロジェクト: TungTNguyen/deeprlhw2

class AtariMDP(MDP, Serializable):

    def __init__(self, rom_path, obs_type=OBS_RAM, frame_skip=4):
        Serializable.__init__(self, rom_path, obs_type, frame_skip)
        self.options = (rom_path, obs_type, frame_skip)
        
        self.ale = ALEInterface()
        self.ale.loadROM(rom_path)        
        self._rom_path = rom_path
        self._obs_type = obs_type
        self._action_set = self.ale.getMinimalActionSet()
        self.frame_skip = frame_skip


    def get_image(self):
        return to_rgb(self.ale)
    def get_ram(self):
        return to_ram(self.ale)
    def game_over(self):
        return self.ale.game_over()
    def reset_game(self):
        return self.ale.reset_game()

    @property
    def n_actions(self):
        return len(self.action_set)

    def get_obs(self):
        if self._obs_type == OBS_RAM:
            return self.get_ram()[None,:]
        else:
            assert self._obs_type == OBS_IMAGE
            return self.get_image()[None,:,:,:]

    def step(self, a):

        reward = 0.0
        action = self.action_set[a]
        for _ in xrange(self.frame_skip):
            reward += self.ale.act(action)
        ob = self.get_obs().reshape(1,-1)
        return ob, np.array([reward]), self.ale.game_over()

    # return: (states, observations)
    def reset(self):
        self.ale.reset_game()
        return self.get_obs()

    @property
    def action_set(self):
        return self._action_set

    def plot(self):
        import cv2
        cv2.imshow("atarigame",self.get_image()) #pylint: disable=E1101
        cv2.waitKey(10) #pylint: disable=E1101

コード例 #9

0

ファイルを表示

ファイル: options.py プロジェクト: Itsukara/async_deep_reinforce

def peekActionSize(rom):
  if args.use_gym:
    import gym
    env = gym.make(args.gym_env)
    return env.action_space.n
  else:
    from ale_python_interface import ALEInterface
    ale = ALEInterface()
    ale.loadROM(rom.encode('ascii'))
    return len(ale.getMinimalActionSet())

コード例 #10

0

ファイルを表示

ファイル: ale_interface.py プロジェクト: vyraun/ALE_dqn

class AleInterface(object):
    def __init__(self, game, args):
        self.game = game
        self.ale = ALEInterface()

        # if sys.platform == 'darwin':
        #     self.ale.setBool('sound', False)  # Sound doesn't work on OSX
        # elif sys.platform.startswith('linux'):
        #     self.ale.setBool('sound', True)
        # self.ale.setBool('display_screen', True)
        #
        self.ale.setBool('display_screen', args.display_screen)

        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)
        self.ale.setInt('random_seed', args.random_seed)

        #
        # if rand_seed is not None:
        #     self.ale.setInt('random_seed', rand_seed)

        rom_file = "./roms/%s.bin" % game
        if not os.path.exists(rom_file):
            print "not found rom file:", rom_file
            sys.exit(-1)
        self.ale.loadROM(rom_file)

        self.actions = self.ale.getMinimalActionSet()


    def get_actions_num(self):
        return len(self.actions)

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def get_screen_gray(self):
        return self.ale.getScreenGrayscale()

    def get_screen_rgb(self):
        return self.ale.getScreenRGB()

    def game_over(self):
        return self.ale.game_over()

    def reset_game(self):
        return self.ale.reset_game()

コード例 #11

0

ファイルを表示

ファイル: emulator.py プロジェクト: ionelhosu/atari-human-checkpoint-replay

class emulator:
	def __init__(self, rom_name, vis,windowname='preview'):
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode");
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('roms/' + rom_name )
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		self.windowname = windowname
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i
		self.init_frame_number = 0

		# print(self.legal_actions)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height))
		self.vis = vis
		if vis: 
			cv2.startWindowThread()
			cv2.namedWindow(self.windowname)

	def get_image(self):
		numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		self.ale.getScreenRGB(numpy_surface)
		image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		return image

	def newGame(self):
		# Instead of resetting the game, we load a checkpoint and start from there.
		# self.ale.reset_game()
		self.ale.restoreState(self.ale.decodeState(checkpoints[random.randint(0,99)].astype('uint8')))
		self.init_frame_number = self.ale.getFrameNumber()
		#self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1))))
		return self.get_image()

	def next(self, action_indx):
		reward = self.ale.act(action_indx)	
		nextstate = self.get_image()
		# scipy.misc.imsave('test.png',nextstate)
		if self.vis:
			cv2.imshow(self.windowname,nextstate)
		return nextstate, reward, self.ale.game_over()

	def get_frame_number(self):
		return self.ale.getFrameNumber() - self.init_frame_number

コード例 #12

0

ファイルを表示

ファイル: ale.py プロジェクト: carpedm20/async-rl

    def __init__(self, rom_filename, seed=None, use_sdl=False, n_last_screens=4,
                 frame_skip=4, treat_life_lost_as_terminal=True,
                 crop_or_scale='scale', max_start_nullops=30,
                 record_screen_dir=None):
        self.n_last_screens = n_last_screens
        self.treat_life_lost_as_terminal = treat_life_lost_as_terminal
        self.crop_or_scale = crop_or_scale
        self.max_start_nullops = max_start_nullops

        ale = ALEInterface()
        if seed is not None:
            assert seed >= 0 and seed < 2 ** 16, \
                "ALE's random seed must be represented by unsigned int"
        else:
            # Use numpy's random state
            seed = np.random.randint(0, 2 ** 16)
        ale.setInt(b'random_seed', seed)
        ale.setFloat(b'repeat_action_probability', 0.0)
        ale.setBool(b'color_averaging', False)
        if record_screen_dir is not None:
            ale.setString(b'record_screen_dir', str.encode(record_screen_dir))
        self.frame_skip = frame_skip
        if use_sdl:
            if 'DISPLAY' not in os.environ:
                raise RuntimeError(
                    'Please set DISPLAY environment variable for use_sdl=True')
            # SDL settings below are from the ALE python example
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool(b'sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                ale.setBool(b'sound', True)
            ale.setBool(b'display_screen', True)
        ale.loadROM(str.encode(rom_filename))

        assert ale.getFrameNumber() == 0


        self.ale = ale
        self.legal_actions = ale.getMinimalActionSet()
        self.initialize()

コード例 #13

0

ファイルを表示

ファイル: breakout.py プロジェクト: tarvaina/ale-breakout-dqn

class Breakout(object):
    steps_between_actions = 4

    def __init__(self):
        self.ale = ALEInterface()
        self.ale.setInt('random_seed', 123)
        self.ale.setBool("display_screen", False)
        self.ale.setBool("sound", False)
        self.ale.loadROM("%s/breakout.bin" % rom_directory)
        self.current_state = [
            self.ale.getScreenRGB(), self.ale.getScreenRGB()
        ]

    def start_episode(self):
        self.ale.reset_game()

    def take_action(self, action):
        assert not self.terminated

        def step():
            reward = self.ale.act(action)
            self.roll_state()
            return reward

        reward = sum(step() for _ in xrange(self.steps_between_actions))

        return (reward, self.current_state)

    def roll_state(self):
        assert len(self.current_state) == 2
        self.current_state = [self.current_state[1], self.ale.getScreenRGB()]
        assert len(self.current_state) == 2

    @property
    def actions(self):
        return self.ale.getMinimalActionSet()

    @property
    def terminated(self):
        return self.ale.game_over() or self.ale.lives() < 5

コード例 #14

0

ファイルを表示

ファイル: emulator.py プロジェクト: technologiclee/deepQN_tensorflow

class emulator:
	def __init__(self, rom_name, vis):
		if vis:
			import cv2
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode");
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('roms/' + rom_name )
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i

		# print(self.legal_actions)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height))
		self.vis = vis
		if vis: 
			cv2.startWindowThread()
			cv2.namedWindow("preview")

	def get_image(self):
		numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		self.ale.getScreenRGB(numpy_surface)
		image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		return image

	def newGame(self):
		self.ale.reset_game()
		return self.get_image()

	def next(self, action_indx):
		reward = self.ale.act(action_indx)	
		nextstate = self.get_image()
		# scipy.misc.imsave('test.png',nextstate)
		if self.vis:
			cv2.imshow('preview',nextstate)
		return nextstate, reward, self.ale.game_over()

コード例 #15

0

ファイルを表示

ファイル: game.py プロジェクト: mKaloer/ArcadeReinforcementLearning

class Game():
    """
    Wrapper around the ALEInterface class.
    """

    def __init__(self, rom_file, sdl=False):
        self.ale = ALEInterface()
        # Setup SDL
        if sdl:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool(b'sound', False) # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool(b'sound', True)
            self.ale.setBool(b'display_screen', True)

        # Load rom
        self.ale.loadROM(str.encode(rom_file))

    def get_action_set(self):
        return self.ale.getLegalActionSet()

    def get_minimal_action_set(self):
        return self.ale.getMinimalActionSet()

    def game_over(self):
        return self.ale.game_over()

    def act(self, action):
        return self.ale.act(action)

    def reset_game(self):
        self.ale.reset_game()

    def get_frame(self):
        return self.ale.getScreenRGB()

コード例 #16

0

ファイルを表示

ファイル: Atari.py プロジェクト: loliverhennigh/Compressing-Dynamical-Systems-Atari

class Atari:
	def __init__(self,rom_name):
		self.ale = ALEInterface()
		self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode")
		self.ale.setInt("random_seed",123)
		self.ale.setInt("frame_skip",4)
		self.ale.loadROM('./' +rom_name)
		self.screen_width,self.screen_height = self.ale.getScreenDims()
		self.legal_actions = self.ale.getMinimalActionSet()
		self.action_map = dict()
		for i in range(len(self.legal_actions)):
			self.action_map[self.legal_actions[i]] = i
		#print len(self.legal_actions)
		self.windowname = rom_name
		#cv2.startWindowThread()
		#cv2.namedWindow(rom_name)

	def get_image(self):
		numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8)
		self.ale.getScreenRGB(numpy_surface)
		image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3))
		return image

	def newGame(self):
		self.ale.reset_game()
		return self.get_image()

	def next(self, action):
		reward = self.ale.act(self.legal_actions[np.argmax(action)])	
		nextstate = self.get_image()
		
		#cv2.imshow(self.windowname,nextstate)
		if self.ale.game_over():
			self.newGame()
		#print "reward %d" % reward 
		return nextstate, reward, self.ale.game_over()

コード例 #17

0

ファイルを表示

class Emulator:
    def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episodes = False):
        
        self.ale = ALEInterface()

        self.ale.setInt("random_seed", rseed * (actor_id +1))

        # For fuller control on explicit action repeat (>= ALE 0.5.0) 
        self.ale.setFloat("repeat_action_probability", 0.0)
        
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt("frame_skip", 1)
        self.ale.setBool("color_averaging", False)
        self.ale.loadROM(rom_path + "/" + rom_name + ".bin")
        self.legal_actions = self.ale.getMinimalActionSet()        
        self.screen_width,self.screen_height = self.ale.getScreenDims()
        #self.ale.setBool('display_screen', True)
        
        # Processed historcal frames that will be fed in to the network 
        # (i.e., four 84x84 images)
        self.screen_images_processed = np.zeros((IMG_SIZE_X, IMG_SIZE_Y, 
            NR_IMAGES)) 
        self.rgb_screen = np.zeros((self.screen_height,self.screen_width, 3), dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height,self.screen_width,1), dtype=np.uint8)

        self.frame_pool = np.empty((2, self.screen_height, self.screen_width))
        self.current = 0
        self.lives = self.ale.lives()

        self.visualize = visualize
        self.visualize_processed = False
        self.windowname = rom_name + ' ' + str(actor_id)
        if self.visualize:
            logger.debug("Opening emulator window...")
            #from skimage import io
            #io.use_plugin('qt')
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)
            logger.debug("Emulator window opened")
            
        if self.visualize_processed:
            logger.debug("Opening processed frame window...")
            cv2.startWindowThread()
            logger.debug("Processed frame window opened")
            cv2.namedWindow(self.windowname + "_processed")
            
        self.single_life_episodes = single_life_episodes

    def get_screen_image(self):
        """ Add screen (luminance) to frame pool """
        # [screen_image, screen_image_rgb] = [self.ale.getScreenGrayscale(), 
        #     self.ale.getScreenRGB()]
        self.ale.getScreenGrayscale(self.gray_screen)
        self.ale.getScreenRGB(self.rgb_screen)
        self.frame_pool[self.current] = np.squeeze(self.gray_screen)
        self.current = (self.current + 1) % FRAMES_IN_POOL
        return self.rgb_screen

    def new_game(self):
        """ Restart game """
        self.ale.reset_game()
        self.lives = self.ale.lives()

        if MAX_START_WAIT < 0:
            logger.debug("Cannot time travel yet.")
            sys.exit()
        elif MAX_START_WAIT > 0:
            wait = random.randint(0, MAX_START_WAIT)
        else:
            wait = 0
        for _ in xrange(wait):
            self.ale.act(self.legal_actions[0])

    def process_frame_pool(self):
        """ Preprocess frame pool """
        
        img = None
        if BLEND_METHOD == "max_pool":
            img = np.amax(self.frame_pool, axis=0)
        
        #img resize(img[:210, :], (84, 84))
        img = cv2.resize(img[:210, :], (84, 84), 
            interpolation=cv2.INTER_LINEAR)
        
        img = img.astype(np.float32)
        img *= (1.0/255.0)
        
        return img
        # Reduce height to 210, if not so
        #cropped_img = img[:210, :]
        # Downsample to 110x84
        #down_sampled_img = resize(cropped_img, (84, 84))
        
        # Crop to 84x84 playing area
        #stackable_image = down_sampled_img[:, 26:110]
        #return stackable_image

    def action_repeat(self, a):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in xrange(ACTION_REPEAT):
            reward += self.ale.act(self.legal_actions[a])
            new_screen_image_rgb = self.get_screen_image()
        return reward, new_screen_image_rgb

    def get_reshaped_state(self, state):
        return np.reshape(state, 
            (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))
        #return np.reshape(self.screen_images_processed, 
        #    (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))

    def get_initial_state(self):
        """ Get the initial state """
        self.new_game()
        for step in xrange(NR_IMAGES):
            reward, new_screen_image_rgb = self.action_repeat(0)
            self.screen_images_processed[:, :, step] = self.process_frame_pool()
            self.show_screen(new_screen_image_rgb)
        if self.is_terminal():
            MAX_START_WAIT -= 1
            return self.get_initial_state()
        return np.copy(self.screen_images_processed) #get_reshaped_state()      

    def next(self, action):
        """ Get the next state, reward, and game over signal """
        reward, new_screen_image_rgb = self.action_repeat(np.argmax(action))
        self.screen_images_processed[:, :, 0:3] = \
            self.screen_images_processed[:, :, 1:4]
        self.screen_images_processed[:, :, 3] = self.process_frame_pool()
        self.show_screen(new_screen_image_rgb)
        terminal = self.is_terminal()
        self.lives = self.ale.lives()
        return np.copy(self.screen_images_processed), reward, terminal #get_reshaped_state(), reward, terminal
    
    def show_screen(self, image):
        """ Show visuals for raw and processed images """
        if self.visualize:
            #io.imshow(image[:210, :], fancy=True)
            cv2.imshow(self.windowname, image[:210, :])
        if self.visualize_processed:
            #io.imshow(self.screen_images_processed[:, :, 3], fancy=True)
            cv2.imshow(self.windowname + "_processed", self.screen_images_processed[:, :, 3])
            
    def is_terminal(self):
        if self.single_life_episodes:
            return (self.is_over() or (self.lives > self.ale.lives()))
        else:
            return self.is_over()

    def is_over(self):
        return self.ale.game_over()

コード例 #18

0

ファイルを表示

ファイル: atari.py プロジェクト: BarzinM/python-tools

class AtariEmulator:
    def __init__(self, dims, history_length):
        ''' Initialize Atari environment '''

        # Parameters
        self.buffer_length = 2  # args.buffer_length
        self.screen_dims = dims
        self.frame_skip = 4  # args.frame_skip
        self.max_start_wait = 30  # args.max_start_wait
        self.history_length = history_length  # args.history_length
        self.start_frames_needed = self.buffer_length - 1 + \
            ((self.history_length - 1) * self.frame_skip)

        # Initialize ALE instance
        self.ale = ALEInterface()
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        # if args.watch:
        #     self.ale.setBool(b'sound', True)
        #     self.ale.setBool(b'display_screen', True)
        self.ale.loadROM(str.encode('../roms/pong.bin'))

        self.buffer = np.empty((self.buffer_length, 210, 160))
        self.current = 0
        self.action_set = self.ale.getMinimalActionSet()
        self.lives = self.ale.lives()

        self.reset()

    def get_possible_actions(self):
        ''' Return list of possible actions for game '''
        return self.action_set

    def get_screen(self):
        ''' Add screen to frame buffer '''
        self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale())
        self.current = (self.current + 1) % self.buffer_length

    def reset(self):
        self.ale.reset_game()
        self.lives = self.ale.lives()

        if self.max_start_wait < 0:
            print("ERROR: max start wait decreased beyond 0")
            sys.exit()
        elif self.max_start_wait <= self.start_frames_needed:
            wait = 0
        else:
            wait = random.randint(
                0, self.max_start_wait - self.start_frames_needed)
        for _ in range(wait):
            self.ale.act(self.action_set[0])

        # Fill frame buffer
        for _ in range(self.buffer_length - 1):
            self.ale.act(self.action_set[0])
            self.get_screen()
        # get initial_states
        frame = self.preprocess()
        state = [(frame, 0, 0, False)]
        for step in range(self.history_length - 1):
            next_frame, reward, terminal, _ = self.run_step(0)
            state.append((frame, 0, reward, terminal))
            frame = next_frame

        # make sure agent hasn't died yet
        if self.isTerminal():
            print(
                "Agent lost during start wait.  Decreasing max_start_wait by 1"
            )
            self.max_start_wait -= 1
            return self.reset()

        return state, next_frame

    def run_step(self, action):
        ''' Apply action to game and return next screen and reward '''

        raw_reward = 0
        for step in range(self.frame_skip):
            raw_reward += self.ale.act(self.action_set[action])
            self.get_screen()

        reward = np.clip(raw_reward, -1, 1)

        terminal = self.isTerminal()
        next_frame = self.preprocess()

        return (next_frame, reward, terminal, raw_reward)

    def preprocess(self):
        ''' Preprocess frame for agent '''

        img = np.amax(self.buffer, axis=0)

        return cv2.resize(img,
                          self.screen_dims,
                          interpolation=cv2.INTER_LINEAR)

    def isTerminal(self):
        t = self.ale.game_over() or (self.lives > self.ale.lives())
        if t:
            self.lives = self.ale.lives()
        return t

コード例 #19

0

ファイルを表示

ファイル: atari.py プロジェクト: yinglanma/AI-project

class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    Will automatically restart when a real episode ends (isOver might be just
    lost of lives but not game over).
    """
    def __init__(self,
                 rom_file,
                 viz=0,
                 height_range=(None, None),
                 frame_skip=4,
                 image_shape=(84, 84),
                 nullop_start=30,
                 live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
                "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            if execute_only_once():
                logger.warn(
                    "https://github.com/mgbellemare/Arcade-Learning-Environment/pull/171 is not merged!"
                )

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) float32 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                #m = cv2.resize(ret, (1920,1200))
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1], :].astype(
            'float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_action_space(self):
        return DiscreteActionSpace(len(self.actions))

    def finish_episode(self):
        self.stats['score'].append(self.current_episode_score.sum)

    def restart_episode(self):
        self.current_episode_score.reset()
        with _ALE_LOCK:
            self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        if isOver:
            self.finish_episode()
        if self.ale.game_over():
            self.restart_episode()
        return (r, isOver)

コード例 #20

0

ファイルを表示

ファイル: environment.py プロジェクト: mthrok/simple_dqn

class ALEEnvironment(Environment):
  def __init__(self, rom_file, args):
    from ale_python_interface import ALEInterface
    self.ale = ALEInterface()
    if args.display_screen:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        self.ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        self.ale.setBool('sound', True)
      self.ale.setBool('display_screen', True)

    self.ale.setInt('frame_skip', args.frame_skip)
    self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
    self.ale.setBool('color_averaging', args.color_averaging)

    if args.random_seed:
      self.ale.setInt('random_seed', args.random_seed)

    if args.record_screen_path:
      if not os.path.exists(args.record_screen_path):
        logger.info("Creating folder %s" % args.record_screen_path)
        os.makedirs(args.record_screen_path)
      logger.info("Recording screens to %s", args.record_screen_path)
      self.ale.setString('record_screen_dir', args.record_screen_path)

    if args.record_sound_filename:
      logger.info("Recording sound to %s", args.record_sound_filename)
      self.ale.setBool('sound', True)
      self.ale.setString('record_sound_filename', args.record_sound_filename)

    self.ale.loadROM(rom_file)

    if args.minimal_action_set:
      self.actions = self.ale.getMinimalActionSet()
      logger.info("Using minimal action set with size %d" % len(self.actions))
    else:
      self.actions = self.ale.getLegalActionSet()
      logger.info("Using full action set with size %d" % len(self.actions))
    logger.debug("Actions: " + str(self.actions))

    self.screen_width = args.screen_width
    self.screen_height = args.screen_height

    self.life_lost = False

  def numActions(self):
    return len(self.actions)

  def restart(self):
    # In test mode, the game is simply initialized. In train mode, if the game
    # is in terminal state due to a life loss but not yet game over, then only
    # life loss flag is reset so that the next game starts from the current
    # state. Otherwise, the game is simply initialized.
    if (
        self.mode == 'test' or
        not self.life_lost or  # `reset` called in a middle of episode
        self.ale.game_over()  # all lives are lost
    ):
      self.ale.reset_game()
    self.life_lost = False

  def act(self, action):
    lives = self.ale.lives()
    reward = self.ale.act(self.actions[action])
    self.life_lost = (not lives == self.ale.lives())
    return reward

  def getScreen(self):
    screen = self.ale.getScreenGrayscale()
    resized = cv2.resize(screen, (self.screen_width, self.screen_height))
    return resized

  def isTerminal(self):
    if self.mode == 'train':
      return self.ale.game_over() or self.life_lost
    return self.ale.game_over()

コード例 #21

0

ファイルを表示

class AleEnvironment(Environment):
  def __init__(self, rom_name, record_display=False, show_display=False, id = 0, shrink=False, life_lost_as_end=True, use_grayscale=True):
    super(AleEnvironment, self).__init__()
    self.ale = ALEInterface()
    self.ale.setInt('random_seed', int(np.random.rand() * 100))
    self.ale.setFloat('repeat_action_probability', 0.0)
    self.ale.setBool('color_averaging', False)
    self.record_display = record_display
    self.show_display = show_display

    if self.record_display:
      self.ale.setString('record_screen_dir', 'movie')
    elif self.show_display:
      self.display_name = rom_name + '_' + str(id)
      cv2.startWindowThread()
      cv2.namedWindow(self.display_name)

    self.ale.loadROM(rom_name)
    self.actions = self.ale.getMinimalActionSet()
    self.screen_width, self.screen_height = self.ale.getScreenDims()
    self.use_grayscale = use_grayscale
    if self.use_grayscale:
      self.screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8)
    else:
      self.screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8)
      self.prev_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8)
    self.shrink = shrink
    self.life_lost_as_end = life_lost_as_end
    self.lives_lost = False
    self.lives = self.ale.lives()


  def __enter__(self):
    return self


  def __exit__(self, exc_type, exc_value, traceback):
    cv2.destroyWindow(self.display_name)


  def act(self, action):
    reward = self.ale.act(self.actions[action])
    if self.use_grayscale:
      screen = self.ale.getScreenGrayscale(self.screen)
    else:
      current_screen = self.ale.getScreenRGB(self.screen)
      screen = np.maximum(current_screen, self.prev_screen)
      self.prev_screen = current_screen
      screen = screen[:, :, 0] * 0.2126 + screen[:, :, 1] * 0.0722 + screen[:, :, 2] * 0.7152
      screen = screen.astype(np.uint8)
    screen = np.reshape(screen, (self.screen_height, self.screen_width, 1))
    state = self.preprocess(screen)
    self.lives_lost = True if self.lives > self.ale.lives() else False
    self.lives = self.ale.lives()
    return reward, state


  def is_end_state(self):
    if self.life_lost_as_end:
      return self.ale.game_over() or self.lives_lost
    else:
      return self.ale.game_over()


  def reset(self):
    if self.ale.game_over():
      self.ale.reset_game()
    self.lives = self.ale.lives()
    self.lives_lost = False


  def available_actions(self):
    # return available indexes instead of actual action value
    return range(0, len(self.actions))


  def preprocess(self, screen):
    if self.show_display and not self.record_display:
      cv2.imshow(self.display_name, screen)

    if self.shrink:
      resized = cv2.resize(screen, (84, 84))
    else:
      resized = cv2.resize(screen, (84, 110))
      resized = resized[18:102, :]

    scaled = resized.astype(np.float32) / 255.0
    return scaled

コード例 #22

0

ファイルを表示

ファイル: game_state.py プロジェクト: amoliu/async_deep_reinforce

class GameState(object):
  def __init__(self, rand_seed, display=False):
    self.ale = ALEInterface()
    self.ale.setInt('random_seed', rand_seed)

    if display:
      self._setup_display()
    
    self.ale.loadROM(ROM)

    # height=210, width=160
    self.screen = np.empty((210, 160, 1), dtype=np.uint8)
    
    no_action = 0
    
    self.reward = self.ale.act(no_action)
    self.terminal = self.ale.game_over()

    # screenのshapeは、(210, 160, 1)
    self.ale.getScreenGrayscale(self.screen)
    
    # (210, 160)にreshape
    reshaped_screen = np.reshape(self.screen, (210, 160))
    
    # height=110, width=84にリサイズ
    resized_screen = cv2.resize(reshaped_screen, (84, 110))
    
    x_t = resized_screen[18:102,:]
    x_t = x_t.astype(np.float32)
    x_t *= (1.0/255.0)
    self.s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2)

    # 実際に利用するactionのみを集めておく
    self.real_actions = self.ale.getMinimalActionSet()
    
  def _setup_display(self):
    if sys.platform == 'darwin':
      import pygame
      pygame.init()
      self.ale.setBool('sound', False)
    elif sys.platform.startswith('linux'):
      self.ale.setBool('sound', True)
    self.ale.setBool('display_screen', True)
    
  def process(self, action):
    # 18種類のうちの実際に利用するactionに変換
    real_action = self.real_actions[action]
    self.reward = self.ale.act(real_action)
    #self.reward = self.ale.act(action)
    self.terminal = self.ale.game_over()
    
    # screenのshapeは、(210, 160, 1)
    self.ale.getScreenGrayscale(self.screen)
    
    # (210, 160)にreshape
    reshaped_screen = np.reshape(self.screen, (210, 160))
    
    # height=210, width=160
    
    # height=110, width=84にリサイズ
    resized_screen = cv2.resize(reshaped_screen, (84, 110))
    x_t1 = resized_screen[18:102,:]
    x_t1 = np.reshape(x_t1, (84, 84, 1))
    x_t1 = x_t1.astype(np.float32)    
    x_t1 *= (1.0/255.0)
    
    self.s_t1 = np.append(x_t1, self.s_t[:,:,0:3], axis = 2)
    if self.terminal:
      self.ale.reset_game()

  def update(self):
    self.s_t = self.s_t1

コード例 #23

0

ファイルを表示

class MyEnv(Environment):
    VALIDATION_MODE = 0

    def __init__(self,
                 rng,
                 rom="ale/breakout.bin",
                 frame_skip=4,
                 ale_options=[{
                     "key": "random_seed",
                     "value": 0
                 }, {
                     "key": "color_averaging",
                     "value": True
                 }, {
                     "key": "repeat_action_probability",
                     "value": 0.
                 }]):
        self._mode = -1
        self._modeScore = 0.0
        self._modeEpisodeCount = 0

        self._frameSkip = frame_skip if frame_skip >= 1 else 1
        self._randomState = rng

        self._ale = ALEInterface()
        for option in ale_options:
            t = type(option["value"])
            if t is int:
                self._ale.setInt(option["key"], option["value"])
            elif t is float:
                self._ale.setFloat(option["key"], option["value"])
            elif t is bool:
                self._ale.setBool(option["key"], option["value"])
            else:
                raise ValueError(
                    "Option {} ({}) is not an int, bool or float.".format(
                        option["key"], t))
        self._ale.loadROM(rom)

        w, h = self._ale.getScreenDims()
        self._screen = np.empty((h, w), dtype=np.uint8)
        self._reducedScreen = np.empty((84, 84), dtype=np.uint8)
        self._actions = self._ale.getMinimalActionSet()

    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._modeScore = 0.0
                self._modeEpisodeCount = 0
            else:
                self._modeEpisodeCount += 1
        elif self._mode != -1:  # and thus mode == -1
            self._mode = -1

        self._ale.reset_game()
        for _ in range(self._randomState.randint(15)):
            self._ale.act(0)
        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84),
                   self._reducedScreen,
                   interpolation=cv2.INTER_NEAREST)

        return [4 * [84 * [84 * [0]]]]

    def act(self, action):
        action = self._actions[action]

        reward = 0
        for _ in range(self._frameSkip):
            reward += self._ale.act(action)
            if self.inTerminalState():
                break

        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84),
                   self._reducedScreen,
                   interpolation=cv2.INTER_NEAREST)

        self._modeScore += reward
        return np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._modeEpisodeCount += 1
        print("== Mean score per episode is {} over {} episodes ==".format(
            self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount))

    def inputDimensions(self):
        return [(4, 84, 84)]

    def observationType(self, subject):
        return np.uint8

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._reducedScreen)]

    def inTerminalState(self):
        return self._ale.game_over()

コード例 #24

0

ファイルを表示

ファイル: environment_train.py プロジェクト: gom7745/DQN-tensorflow

class Environment(object):
    def __init__(self, config):
        #self.env = gym.make(config.env_name)
        self.env = ALEInterface()
        self.env.setInt(b'random_seed', 123)
        self.env.loadROM('./breakout.bin')

        self.actionSet = self.env.getMinimalActionSet()

        screen_width, screen_height, self.action_repeat, self.random_start = \
            config.screen_width, config.screen_height, config.action_repeat, config.random_start

        self.display = config.display
        self.dims = (screen_width, screen_height)

        self._screen = None
        self.reward = 0
        self.terminal = True

    def new_game(self, from_random_game=False):
        if self.lives == 0:
            # self._screen = self.env.reset()
            self.env.reset_game()  #self.env.reset()
            self._screen = self.env.getScreenRGB()
        self._step(0)
        self.render()
        return self.screen, 0, 0, self.terminal

    def new_random_game(self):
        self.new_game(True)
        for _ in xrange(random.randint(0, self.random_start - 1)):
            self._step(0)
        self.render()
        return self.screen, 0, 0, self.terminal

    def _step(self, action):
        # self._screen, self.reward, self.terminal, _ = self.env.step(action)
        self.reward = self.env.act(self.actionSet[action])
        self._screen = self.env.getScreenRGB()
        self.terminal = self.env.game_over()

    def _random_step(self):
        action = random.randint(0, self.action_size -
                                1)  #self.env.action_space.sample()
        self._step(action)

    @property
    def screen(self):
        return cv2.resize(
            cv2.cvtColor(self._screen, cv2.COLOR_RGB2GRAY) / 255., self.dims)
        #return cv2.resize(cv2.cvtColor(self._screen, cv2.COLOR_BGR2YCR_CB)/255., self.dims)[:,:,0]

    @property
    def action_size(self):
        return len(self.actionSet)  #self.env.action_space.n

    @property
    def lives(self):
        return self.env.lives()  #self.env.ale.lives()

    @property
    def state(self):
        return self.screen, self.reward, self.terminal

    def render(self):
        if self.display:
            1  #self.env.render()

    def after_act(self, action):
        self.render()

コード例 #25

0

ファイルを表示

class GameState2(object):
    def __init__(self, rand_seed, display=False, no_op_max=7):
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', rand_seed)
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        self.ale.setBool(b'color_averaging', True)
        self.ale.setInt(b'frame_skip', 4)
        self._no_op_max = no_op_max

        if display:
            self._setup_display()

        self.ale.loadROM(ROM2.encode('ascii'))

        # collect minimal action set
        self.real_actions = self.ale.getMinimalActionSet()

        # height=210, width=160
        self._screen = np.empty((210, 160, 1), dtype=np.uint8)

        self.reset()

    def _process_frame(self, action, reshape):
        reward = self.ale.act(action)
        terminal = self.ale.game_over()

        # screen shape is (210, 160, 1)
        self.ale.getScreenGrayscale(self._screen)

        # reshape it into (210, 160)
        reshaped_screen = np.reshape(self._screen, (210, 160))

        # resize to height=110, width=84
        resized_screen = cv2.resize(reshaped_screen, (84, 110))

        x_t = resized_screen[18:102, :]
        if reshape:
            x_t = np.reshape(x_t, (84, 84, 1))
        x_t = x_t.astype(np.float32)
        x_t *= (1.0 / 255.0)
        return reward, terminal, x_t

    def _setup_display(self):
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            self.ale.setBool(b'sound', False)
        elif sys.platform.startswith('linux'):
            self.ale.setBool(b'sound', True)
        self.ale.setBool(b'display_screen', True)

    def reset(self):
        self.ale.reset_game()

        # randomize initial state
        if self._no_op_max > 0:
            no_op = np.random.randint(0, self._no_op_max + 1)
            for _ in range(no_op):
                self.ale.act(0)

        _, _, x_t = self._process_frame(0, False)

        self.reward = 0
        self.terminal = False
        self.s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)

    def process(self, action):
        # convert original 18 action index to minimal action set index
        real_action = self.real_actions[action]

        r, t, x_t1 = self._process_frame(real_action, True)

        self.reward = r
        self.terminal = t
        self.s_t1 = np.append(self.s_t[:, :, 1:], x_t1, axis=2)

    def update(self):
        self.s_t = self.s_t1

コード例 #26

0

ファイルを表示

ファイル: atari.py プロジェクト: Paseam/tensorpack

class AtariPlayer(RLEnvironment):
    """
    A wrapper for atari emulator.
    NOTE: will automatically restart when a real episode ends
    """
    def __init__(self, rom_file, viz=0, height_range=(None,None),
            frame_skip=4, image_shape=(84, 84), nullop_start=30,
            live_lost_as_eoe=True):
        """
        :param rom_file: path to the rom
        :param frame_skip: skip every k frames and repeat the action
        :param image_shape: (w, h)
        :param height_range: (h1, h2) to cut
        :param viz: visualization to be done.
            Set to 0 to disable.
            Set to a positive number to be the delay between frames to show.
            Set to a string to be a directory to store frames.
        :param nullop_start: start with random number of null ops
        :param live_losts_as_eoe: consider lost of lives as end of episode.  useful for training.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_dir('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
                "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
        except AttributeError:
            log_once()

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)

            self.ale.setInt(b"random_seed", self.rng.randint(0, 10000))
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()


        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start
        self.height_range = height_range
        self.image_shape = image_shape

        self.current_episode_score = StatCounter()
        self.restart_episode()

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def current_state(self):
        """
        :returns: a gray-scale (h, w, 1) float32 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                #m = cv2.resize(ret, (1920,1200))
                cv2.imshow(self.windowname, ret)
                time.sleep(self.viz)
        ret = ret[self.height_range[0]:self.height_range[1],:].astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        ret = cv2.resize(ret, self.image_shape)
        ret = np.expand_dims(ret, axis=2)
        return ret

    def get_action_space(self):
        return DiscreteActionSpace(len(self.actions))

    def restart_episode(self):
        if self.current_episode_score.count > 0:
            self.stats['score'].append(self.current_episode_score.sum)
        self.current_episode_score.reset()
        self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def action(self, act):
        """
        :param act: an index of the action
        :returns: (reward, isOver)
        """
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        self.current_episode_score.feed(r)
        isOver = self.ale.game_over()
        if isOver:
            self.restart_episode()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives
        return (r, isOver)

コード例 #27

0

ファイルを表示

class AtariEnvironment:
    """
	Environment for playing Atari games using ALE Interface
	"""
    def __init__(self, game_filename, **kwargs):
        """
		Create an environment with the provided game
		"""

        pygame.init()

        self.screen = pygame.display.set_mode((160, 210))
        self.fps_clock = pygame.time.Clock()

        self.show_while_training = True

        # Buffer for grabbing the screen from ALE
        self.screen_buffer = np.zeros((100800, ), np.uint8)

        # Create the ALE interface and load the game
        self.ale = ALEInterface()
        self.ale.setBool('color_averaging', True)
        self.ale.setFloat('repeat_action_probability', 0.0)
        self.ale.loadROM(game_filename)

        # Grab the set of available moves for this game
        self.move_list = self.ale.getMinimalActionSet()

        self.listeners = []

    def update_screen(self):
        """
		Grab the current screen from ALE and display it via pygame
		"""

        self.ale.getScreenRGB(self.screen_buffer)

#		if self.show_while_training:
#			game_screen = self.screen_buffer.reshape((210,160,3))
#			game_screen = np.transpose(game_screen, (1,0,2))
#
#			game_surface = pygame.surfarray.make_surface(game_screen)
#			self.screen.blit(game_surface, (0,0))

#			pygame.display.flip()

    def get_reduced_screen(self):
        """
		Convert current screen to 84x84 np array of luminescence values.  Scale values
		from 0.0 to 1.0 to work with Tensorflow
		"""

        # Reshape the screen buffer to an appropriate shape
        game_screen = self.screen_buffer.reshape((210, 160, 3))

        # Convert to luminosity
        gray_screen = np.dot(game_screen, np.array([0.299, 0.587,
                                                    0.114])).astype(np.uint8)
        gray_screen = ndimage.zoom(gray_screen, (0.4, 0.525))

        return gray_screen

    def act(self, action):
        """
		Perform an action on the environment
		"""

        ale_action = self.move_list[action]

        return self.ale.act(ale_action)

    def terminal(self):
        """
		Return if the state is a terminal state
		"""

        return self.ale.game_over()

    def lives(self):
        """
		How many lives are left
		"""

        return self.ale.lives()

    def reset_game(self):
        """
		"""

        self.ale.reset_game()

コード例 #28

0

ファイルを表示

ファイル: AleAgent.py プロジェクト: johnygomez/AtariRL

class AleAgent:
    ##
    # @param processing_cls Class for processing game visual unput
    def __init__(self, processing_cls, game_rom=None, encoder_model=None, encoder_weights=None, NFQ_model=None, NFQ_weights=None):
        assert game_rom is not None
        self.game = ALEInterface()
        if encoder_weights is not None and encoder_model is not None:
            self.encoder = Encoder(path_to_model=encoder_model, path_to_weights=encoder_weights)
        else:
            self.encoder = Encoder()

        self.processor = processing_cls()

        # Get & Set the desired settings
        self.game.setInt('random_seed', 0)
        self.game.setInt('frame_skip', 4)

        # Set USE_SDL to true to display the screen. ALE must be compilied
        # with SDL enabled for this to work. On OSX, pygame init is used to
        # proxy-call SDL_main.
        USE_SDL = True

        if USE_SDL:
            if sys.platform == 'darwin':
                pygame.init()
                self.game.setBool('sound', False)   # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.game.setBool('sound', False)   # no sound

            self.game.setBool('display_screen', True)

        # Load the ROM file
        self.game.loadROM(game_rom)

        # Get the list of legal actions
        self.legal_actions = self.game.getLegalActionSet()

        # Get actions applicable in current game
        self.minimal_actions = self.game.getMinimalActionSet()

        if NFQ_model is not None and NFQ_weights is not None:
            self.NFQ = NFQ(
                self.encoder.out_dim,
                len(self.minimal_actions),
                model_path=NFQ_model,
                weights_path=NFQ_weights
            )
        else:
            self.NFQ = NFQ(self.encoder.out_dim, len(self.minimal_actions))

        (self.screen_width, self.screen_height) = self.game.getScreenDims()
        self.screen_data = np.zeros(
            (self.screen_height, self.screen_width),
            dtype=np.uint8
        )

    ##
    # Initialize the reinforcement learning
    def train(self, num_of_episodes=1500, eps=0.995, key_binding=None):
        pygame.init()
        for episode in xrange(num_of_episodes):
            total_reward = 0
            moves = 0
            hits = 0
            print 'Starting episode: ', episode+1

            if key_binding:
                eps = 0.05
            else:
                eps -= 2/num_of_episodes

            self.game.getScreenGrayscale(self.screen_data)
            pooled_data = self.processor.process(self.screen_data)
            next_state = self.encoder.encode(pooled_data)
            while not self.game.game_over():
                current_state = next_state
                x = None

                if key_binding:
                    key_pressed = pygame.key.get_pressed()
                    x = key_binding(key_pressed)

                if x is None:
                    r = np.random.rand()
                    if r < eps:
                        x = np.random.randint(self.minimal_actions.size)
                    else:
                        x = self.NFQ.predict_action(current_state)

                a = self.minimal_actions[x]
                # Apply an action and get the resulting reward
                reward = self.game.act(a)

                # record only every 3 frames
                # if not moves % 3:
                self.game.getScreenGrayscale(self.screen_data)
                pooled_data = self.processor.process(self.screen_data)
                next_state = self.encoder.encode(pooled_data)
                transition = np.append(current_state, x)
                transition = np.append(transition, next_state)
                transition = np.append(transition, reward)
                self.NFQ.add_transition(transition)

                total_reward += reward
                if reward > 0:
                    hits += 1

                moves += 1
                if eps > 0.1:
                    eps -= 0.00001
            # end while

            print 'Epsilon: ', eps
            print 'Episode', episode+1, 'ended with score:', total_reward
            print 'Hits: ', hits
            self.game.reset_game()
            self.NFQ.train()
            hits = 0
            moves = 0
            self.NFQ.save_net()
        # end for

    ##
    # Play the game!
    def play(self):
        total_reward = 0
        moves = 1
        while not self.game.game_over():
            self.game.getScreenGrayscale(self.screen_data)
            pooled_data = self.processor.process(self.screen_data)
            current_state = self.encoder.encode(pooled_data)

            x = self.NFQ.predict_action(current_state)
            a = self.minimal_actions[x]
            reward = self.game.act(a)
            total_reward += reward
            moves += 1

        print 'The game ended with score:', total_reward, ' after: ', moves, ' moves'

コード例 #29

0

ファイルを表示

ファイル: ale_env.py プロジェクト: only4hj/DeepRL

 def get_actions(self, rom=None):
     if self.actions is None and rom != None:
         ale = ALEInterface()
         ale.loadROM(rom)
         self.actions = ale.getMinimalActionSet()
     return self.actions

コード例 #30

0

ファイルを表示

class ALEEnvironment(BaseEnvironment):
    # 63 games
    ADVENTURE = "adventure"
    AIR_RAID = "air_raid"
    ALIEN = "alien"
    AMIDAR = "amidar"
    ASSAULT = "assault"
    ASTERIX = "asterix"
    ASTEROIDS = "asteroids"
    ATLANTIS = "aslantis"
    BANK_HEIST = "bank_heist"
    BATTLE_ZONE = "battle_zone"
    BEAM_RIDER = "beam_rider"
    BERZERK = "berzerk"
    BOWLING = "bowling"
    BOXING = "boxing"
    BREAKOUT = "breakout"
    CARNIVAL = "carnival"
    CENTIPEDE = "centipede"
    CHOPPER_COMMAND = "chopper_command"
    CRAZY_CLIMBER = "crazy_climber"
    DEFENDER = "defender"
    DEMON_ATTACK = "demon_attack"
    DOUBLE_DUNK = "double_dunk"
    ELEVATOR_ACTION = "elevator_action"
    ENDURO = "enduro"
    FISHING_DERBY = "fishing_derby"
    FREEWAY = "freeway"
    FROSTBITE = "frostbite"
    GOPHER = "gopher"
    GRAVITAR = "gravitar"
    HERO = "hero"
    ICE_HOCKEY = "ice_hockey"
    JAMESBOND = "jamesbond"
    JOURNEY_ESCAPE = "journey_escape"
    KABOOM = "kaboom"
    KANGAROO = "kangaroo"
    KRULL = "krull"
    KUNGFU_MASTER = "kung_fu_master"
    MONTEZUMA = "montezuma_revenge"
    MS_PACMAN = "ms_pacman"
    UNKNOWN = "name_this_game"
    PHOENIX = "phoenix"
    PITFALL = "pitfall"
    PONG = "pong"
    POOYAN = "pooyan"
    PRIVATE_EYE = "private_eye"
    QBERT = "qbert"
    RIVERRAID = "riverraid"
    ROAD_RUNNER = "road_runner"
    ROBOTANK = "robotank"
    SEAQUEST = "seaquest"
    SKIING = "skiing"
    SOLARIS = "solaris"
    SPACE_INVADERS = "space_invaders"
    STAR_GUNNER = "star_gunner"
    TENNIS = "tennis"
    TIME_PILOT = "time_pilot"
    TUTANKHAM = "tutankham"
    UP_N_DOWN = "up_n_down"
    VENTURE = "venture"
    VIDEO_PINBALL = "video_pinball"
    WIZARD_OF_WOR = "wizard_of_wor"
    YARS_REVENGE = "yars_revenge"
    ZAXXON = "zaxxon"

    def __init__(self,
                 rom_name,
                 frame_skip=4,
                 repeat_action_probability=0.,
                 max_episode_steps=10000,
                 loss_of_life_termination=False,
                 loss_of_life_negative_reward=False,
                 bitwise_max_on_two_consecutive_frames=False,
                 is_render=False,
                 seed=None,
                 startup_policy=None,
                 disable_actions=None,
                 num_of_sub_actions=-1,
                 state_processor=AtariProcessor(resize_shape=(84, 84),
                                                convert_to_grayscale=True)):

        os.environ['SDL_VIDEO_CENTERED'] = '1'

        file_exist = isfile(ALEEnvironment.get_rom_path(rom_name))
        if not file_exist:
            raise ValueError("Rom not found ! Please put rom " + rom_name +
                             ".bin into: " + ALEEnvironment.get_rom_path())

        self.__rom_name = rom_name
        self.__ale = ALEInterface()

        if frame_skip < 0:
            print("Invalid frame_skip param ! Set default frame_skip = 4")
            self.__frame_skip = 4
        else:
            self.__frame_skip = frame_skip

        if repeat_action_probability < 0 or repeat_action_probability > 1:
            raise ValueError("Invalid repeat_action_probability")
        else:
            self.__repeat_action_probability = repeat_action_probability

        self.__max_episode_steps = max_episode_steps
        self.__loss_of_life_termination = loss_of_life_termination
        self.__loss_of_life_negative_reward = loss_of_life_negative_reward
        self.__max_2_frames = bitwise_max_on_two_consecutive_frames

        # Max 2 frames only work with grayscale
        self.__grayscale = False
        if state_processor is not None and type(
                state_processor
        ) is AtariProcessor and state_processor.get_grayscale():
            self.__grayscale = True

        if self.__max_2_frames and self.__frame_skip > 1 and self.__grayscale:
            self.__max_2_frames = True
        else:
            self.__max_2_frames = False

        self.__is_render = is_render
        self.__processor = state_processor

        if seed is None or seed <= 0 or seed >= 9999:
            if seed is not None and (seed < 0 or seed >= 9999):
                print("Invalid seed ! Default seed = randint(0, 9999")
            self.__seed = np.random.randint(0, 9999)
            self.__random_seed = True
        else:
            self.__random_seed = False
            self.__seed = seed

        self.__current_steps = 0
        self.__is_life_lost = False
        self.__is_terminal = False
        self.__current_lives = 0
        self.__action_reduction = num_of_sub_actions
        self.__scr_width, self.__scr_height, self.__action_set = self.__init_ale(
        )
        self.__prev_buffer = np.empty((self.__scr_height, self.__scr_width, 3),
                                      dtype=np.uint8)
        self.__current_buffer = np.empty(
            (self.__scr_height, self.__scr_width, 3), dtype=np.uint8)
        self.__current_state = None
        self.__prev_state = None
        self.__startup_policy = startup_policy
        if disable_actions is None:
            self.__dis_act = []
        else:
            self.__dis_act = disable_actions

        if self.__processor.get_number_of_objectives() > 1:
            self.__multi_objs = True
        else:
            self.__multi_objs = False

    def get_processor(self):
        return self.__processor

    def __init_ale(self):

        self.__ale.setBool(b'display_screen', self.__is_render)

        if self.__max_2_frames and self.__frame_skip > 1:
            self.__ale.setInt(b'frame_skip', 1)
        else:
            self.__ale.setInt(b'frame_skip', self.__frame_skip)

        self.__ale.setInt(b'random_seed', self.__seed)
        self.__ale.setFloat(b'repeat_action_probability',
                            self.__repeat_action_probability)
        self.__ale.setBool(b'color_averaging', False)

        self.__ale.loadROM(
            ALEEnvironment.get_rom_path(self.__rom_name).encode())

        width, height = self.__ale.getScreenDims()
        return width, height, self.__ale.getMinimalActionSet()

    def clone(self):
        if self.__random_seed:
            seed = np.random.randint(0, 9999)
        else:
            seed = self.__seed

        return ALEEnvironment(self.__rom_name, self.__frame_skip,
                              self.__repeat_action_probability,
                              self.__max_episode_steps,
                              self.__loss_of_life_termination,
                              self.__loss_of_life_negative_reward,
                              self.__max_2_frames, self.__is_render, seed,
                              self.__startup_policy,
                              self.__dis_act, self.__action_reduction,
                              self.__processor.clone())

    def step_all(self, a):
        if isinstance(a, (list, np.ndarray)):
            if len(a) <= 0:
                raise ValueError('Empty action list !')
            a = a[0]
        self.__current_steps += 1
        act = self.__action_set[a]
        rew = self._step(act)
        next_state = self.get_state()
        _is_terminal = self.is_terminal()
        return next_state, rew, _is_terminal, self.__current_steps

    def reset(self):
        self.__ale.reset_game()
        self.__current_lives = self.__ale.lives()
        self.__is_life_lost = False
        self.__is_terminal = False
        self.__current_state = None
        self.__prev_state = None

        action_space = self.get_action_space()
        v_range, is_range = action_space.get_range()
        if len(v_range) > 1:
            self.step(1)

        # No op steps
        if self.__startup_policy is not None:
            max_steps = int(self.__startup_policy.get_max_steps())
            for _ in range(max_steps):
                act = self.__startup_policy.step(self.get_state(),
                                                 action_space)
                self.step(act)

        # Start training from this point
        self.__current_steps = 0

        # Reset processor
        self.__processor.reset()

        return self.get_state()

    def _pre_step(self, act):
        if self.__max_2_frames and self.__frame_skip > 1:
            rew = 0
            for i in range(self.__frame_skip - 2):
                rew += self.__ale.act(act)
                self.__prev_buffer = self.__ale.getScreenRGB(
                    self.__prev_buffer)

            self.__prev_buffer = self.__ale.getScreenRGB(self.__prev_buffer)

            rew += self.__ale.act(act)

            self.__current_buffer = self.__ale.getScreenRGB(
                self.__current_buffer)

            self.__is_terminal = self.__ale.game_over()

            self.__prev_state = self.__processor.process(self.__prev_buffer)

            self.__current_state = self.__processor.process(
                self.__current_buffer)

            self.__current_state = np.maximum.reduce(
                [self.__prev_state, self.__current_state])
        else:
            rew = self.__ale.act(act)
            self.__current_buffer = self.__ale.getScreenRGB(
                self.__current_buffer)
            self.__is_terminal = self.__ale.game_over()

            if self.__processor is not None:
                self.__current_state = self.__processor.process(
                    self.__current_buffer)

        if self.__multi_objs and self.__processor is not None:
            all_rewards = self.__processor.get_rewards(rew)
            return all_rewards
        else:
            return rew

    def _step(self, act):
        for i in range(len(self.__dis_act)):
            if act == self.__dis_act[i]:
                act = 0

        if not self.__loss_of_life_termination and not self.__loss_of_life_negative_reward:
            if not self.__is_terminal:
                next_lives = self.__ale.lives()
                if next_lives < self.__current_lives:
                    act = 1
                    self.__current_lives = next_lives
            return self._pre_step(act)
        else:
            rew = self._pre_step(act)
            next_lives = self.__ale.lives()
            if next_lives < self.__current_lives:
                if self.__loss_of_life_negative_reward:
                    rew -= 1
                self.__current_lives = next_lives
                self.__is_life_lost = True

            return rew

    def get_state(self):
        if not self.__max_2_frames:
            if self.__processor is not None:
                return self.__current_state
            else:
                return self.__current_buffer
        else:
            return self.__current_state

    def is_terminal(self):
        if self.__loss_of_life_termination and self.__is_life_lost:
            return True
        elif self.__max_episode_steps is not None and self.__current_steps > self.__max_episode_steps:
            return True
        else:
            return self.__is_terminal

    @staticmethod
    def get_rom_path(rom=None):
        if rom is None:
            return os.path.dirname(os.path.abspath(__file__)) + "/roms/"
        else:
            return os.path.dirname(
                os.path.abspath(__file__)) + "/roms/" + rom + ".bin"

    @staticmethod
    def list_all_roms():
        return [
            f for f in listdir(ALEEnvironment.get_rom_path())
            if isfile(join(ALEEnvironment.get_rom_path(), f))
        ]

    def get_state_space(self):
        if self.__processor is None:
            shape = self.__current_buffer.shape
        else:
            shape = self.__processor.process(self.__current_buffer).shape
        min_value = np.zeros(shape, dtype=np.uint8)
        max_value = np.full(shape, 255)
        return Space(min_value, max_value, True)

    def get_action_space(self):
        if self.__action_reduction >= 1:
            return Space(0, self.__action_reduction - 1, True)
        else:
            return Space(0, len(self.__action_set) - 1, True)

    def step(self, act):
        if isinstance(act, (list, np.ndarray)):
            if len(act) <= 0:
                raise ValueError('Empty action list !')
            act = act[0]
        self.__current_steps += 1
        act = self.__action_set[act]
        rew = self._step(act)
        return rew

    def get_current_steps(self):
        return self.__current_steps

    def is_atari(self):
        return True

    def is_render(self):
        return self.__is_render

    def get_number_of_objectives(self):
        if self.__processor is None:
            return 1
        else:
            return self.__processor.get_number_of_objectives()

    def get_number_of_agents(self):
        if self.__processor is None:
            return 1
        else:
            return self.__processor.get_number_of_agents()

    def get_state_processor(self):
        return self.__processor

コード例 #31

0

ファイルを表示

ファイル: test.py プロジェクト: prabhatnagarajan/dqn

def test(session,
         hist_len=4,
         discount=0.99,
         act_rpt=4,
         upd_freq=4,
         min_sq_grad=0.01,
         epsilon=TEST_EPSILON,
         no_op_max=30,
         num_tests=30,
         learning_rate=0.00025,
         momentum=0.95,
         sq_momentum=0.95):
    #Create ALE object
    if len(sys.argv) < 2:
        print 'Usage:', sys.argv[0], 'rom_file'
        sys.exit()

    ale = ALEInterface()

    # Get & Set the desired settings
    ale.setInt('random_seed', 123)
    #Changes repeat action probability from default of 0.25
    ale.setFloat('repeat_action_probability', 0.0)
    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.
    USE_SDL = False
    if USE_SDL:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', True)

    # Load the ROM file
    ale.loadROM(sys.argv[1])

    # create DQN agent
    # learning_rate and momentum are unused parameters (but needed)
    agent = DQN(ale, session, epsilon, learning_rate, momentum, sq_momentum,
                hist_len, len(ale.getMinimalActionSet()), None, discount,
                rom_name(sys.argv[1]))

    #Store the most recent two images
    preprocess_stack = deque([], 2)

    num_episodes = 0
    while num_episodes < num_tests:
        #initialize sequence with initial image
        seq = list()
        perform_no_ops(ale, no_op_max, preprocess_stack, seq)
        total_reward = 0
        while not ale.game_over():
            state = get_state(seq, hist_len)
            action = agent.get_action_best_network(state, epsilon)
            #skip frames by repeating action
            reward = 0
            for i in range(act_rpt):
                reward = reward + ale.act(action)
                preprocess_stack.append(ale.getScreenRGB())
            seq.append(pp.preprocess(preprocess_stack[0], preprocess_stack[1]))
            total_reward += reward
        print('Episode ended with score: %d' % (total_reward))
        num_episodes = num_episodes + 1
        ale.reset_game()

コード例 #32

0

ファイルを表示

ファイル: agent.py プロジェクト: JCass45/FYP-Deep-Reinforcement-Learning-on-Atari-2600

class Agent():
    def __init__(self, game, agent_type, display, load_model, record, test):
        self.name = game
        self.agent_type = agent_type
        self.ale = ALEInterface()
        self.ale.setInt(str.encode('random_seed'), np.random.randint(100))
        self.ale.setBool(str.encode('display_screen'), display or record)
        if record:
            self.ale.setString(str.encode('record_screen_dir'), str.encode('./data/recordings/{}/{}/tmp/'.format(game, agent_type)))

        self.ale.loadROM(str.encode('./roms/{}.bin'.format(self.name)))
        self.action_list = list(self.ale.getMinimalActionSet())
        self.frame_shape = np.squeeze(self.ale.getScreenGrayscale()).shape
        if test:
            self.name += '_test'

        if 'space_invaders' in self.name:
            # Account for blinking bullets
            self.frameskip = 2
        else:
            self.frameskip = 3

        self.frame_buffer = deque(maxlen=4)
        if load_model and not record:
            self.load_replaymemory()
        else:
            self.replay_memory = ReplayMemory(500000, 32)

        model_input_shape = self.frame_shape + (4,)
        model_output_shape = len(self.action_list)

        if agent_type == 'dqn':
            self.model = DeepQN(
                model_input_shape,
                model_output_shape,
                self.action_list,
                self.replay_memory,
                self.name,
                load_model
            )
        elif agent_type == 'double':
            self.model = DoubleDQN(
                model_input_shape,
                model_output_shape,
                self.action_list,
                self.replay_memory,
                self.name,
                load_model
            )

        else:
            self.model = DuelingDQN(
                model_input_shape,
                model_output_shape,
                self.action_list,
                self.replay_memory,
                self.name,
                load_model
            )

        print('{} Loaded!'.format(' '.join(self.name.split('_')).title()))
        print('Displaying: ', display)
        print('Frame Shape: ', self.frame_shape)
        print('Frame Skip: ', self.frameskip)
        print('Action Set: ', self.action_list)
        print('Model Input Shape: ', model_input_shape)
        print('Model Output Shape: ', model_output_shape)
        print('Agent: ', agent_type)

    def training(self, steps):
        '''
        Trains the agent for :steps number of weight updates.

        Returns the average model loss
        '''

        loss = []

        # Initialize frame buffer. np.squeeze removes empty dimensions e.g. if shape=(210,160,__)
        self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
        self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
        self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
        self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))

        try:
            for step in range(steps):
                gameover = False
                initial_state = np.stack(self.frame_buffer, axis=-1)
                action = self.model.predict_action(initial_state)

                # Backup data
                if step % 5000 == 0:
                    self.model.save_model()
                    self.model.save_hyperparams()
                    self.save_replaymemory()

                # If using a target model check for weight updates
                if hasattr(self.model, 'tau'):
                    if self.model.tau == 0:
                        self.model.update_target_model()
                        self.model.tau = 10000
                    else:
                        self.model.tau -= 1

                # Frame skipping technique https://danieltakeshi.github.io/2016/11/25/frame-skipping-and-preprocessing-for-deep-q-networks-on-atari-2600-games/
                lives_before = self.ale.lives()
                for _ in range(self.frameskip):
                    self.ale.act(action)

                reward = self.ale.act(action)
                self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
                lives_after = self.ale.lives()

                if lives_after < lives_before:
                    gameover = True  # Taking advice from dude on reddit
                    reward = -1

                if self.ale.game_over():
                    gameover = True
                    reward = -1
                    self.ale.reset_game()

                new_state = np.stack(self.frame_buffer, axis=-1)

                # Experiment with clipping rewards for stability purposes
                reward = np.clip(reward, -1, 1)
                self.replay_memory.add(
                    initial_state,
                    action,
                    reward,
                    gameover,
                    new_state
                )

                loss += self.model.replay_train()
        except:
            self.model.save_model()
            self.model.save_hyperparams()
            self.save_replaymemory()
            raise KeyboardInterrupt

        return np.mean(loss, axis=0)

    def simulate_random(self):
        print('Simulating game randomly')
        done = False
        total_reward = 0
        while not done:
            action = np.random.choice(self.ale.getMinimalActionSet())
            reward = self.ale.act(action)
            total_reward += reward
            if self.ale.game_over():
                reward = -1
                done = True

            reward = np.clip(reward, -1, 1)
            if reward != 0:
                print(reward)

        frames_survived = self.ale.getEpisodeFrameNumber()
        self.ale.reset_game()
        return total_reward, frames_survived

    def simulate_intelligent(self, evaluating=False):
        done = False
        total_score = 0

        self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
        self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
        self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
        self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
        while not done:
            state = np.stack(self.frame_buffer, axis=-1)
            action = self.model.predict_action(state, evaluating)

            for _ in range(self.frameskip):
                self.ale.act(action)

            # Remember, ale.act returns the increase in game score with this action
            total_score += self.ale.act(action)

            # Pushes oldest frame out
            self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale()))
            if self.ale.game_over():
                done = True

        frames_survived = self.ale.getEpisodeFrameNumber()
        print('   Game Over')
        print('   Frames Survived: ', frames_survived)
        print('   Score: ', total_score)
        print('===========================')
        self.ale.reset_game()
        return total_score, frames_survived

    def save_replaymemory(self):
        with bz2.BZ2File('./data/{}/{}_replaymem.obj'.format(self.agent_type, self.name), 'wb') as f:
            pickle.dump(self.replay_memory, f, protocol=pickle.HIGHEST_PROTOCOL)
            print('Saved replay memory at ', datetime.now())

    def load_replaymemory(self):
        try:
            with bz2.BZ2File('./data/{}/{}_replaymem.obj'.format(self.agent_type, self.name), 'rb') as f:
                self.replay_memory = pickle.load(f)
                print('Loaded replay memory at ', datetime.now())
        except FileNotFoundError:
            print('No replay memory file found')
            raise KeyboardInterrupt

コード例 #33

0

ファイルを表示

ale = ALEInterface()
ale.setInt('random_seed', 123)
ale.setInt("frame_skip", frameSkip)
USE_SDL = True
if USE_SDL:
    if sys.platform == 'darwin':
        import pygame

        pygame.init()
        ale.setBool('sound', False)  # Sound doesn't work on OSX
    elif sys.platform.startswith('linux'):
        ale.setBool('sound', False)
    ale.setBool('display_screen', display_screen)

ale.loadROM("rom/Breakout.A26")
legal_actions = ale.getMinimalActionSet()

n_senses = 82 * 72
n_actions = len(legal_actions)
temporal_window = 1
hiddenSize1 = 256
hiddenSize2 = 32
network_size = n_senses * (temporal_window) + n_actions * (temporal_window - 1)
memorySize = 1000000
maxEpisode = 1000000
maxFrame = 50000000
frameCount = 0

startLearningFrame = 10000
explorationRate = 1.0
finalExplorationFrame = 20000

コード例 #34

0

ファイルを表示

ファイル: run_dqn.py プロジェクト: Mog333/DeepRL

def run_experiment(args):
    parameters = Parameters.processArguments(args, __doc__)

    #if the nnFile is a directory, check for a previous experiment run in it and start from there
    #load its parameters, append to its evalresults file, open its largest network file
    #If its none, create a experiment directory. create a results file, save parameters, save network files here.

    experimentDirectory = parameters.rom + "_" + time.strftime(
        "%d-%m-%Y-%H-%M") + "/"
    resultsFileName = experimentDirectory + "results.csv"
    startingEpoch = 1
    if parameters.nnFile is None or parameters.nnFile.endswith(".pkl"):
        #Create your experiment directory, results file, save parameters
        if not os.path.isdir(experimentDirectory):
            os.mkdir(experimentDirectory)

        resultsFile = open(resultsFileName, "a")
        resultsFile.write("Epoch,\tAverageReward,\tMean Q Value\n")
        resultsFile.close()

        parametersFile = open(experimentDirectory + "parameters.pkl", 'wb', -1)
        cPickle.dump(parameters, parametersFile)
        parametersFile.close()

    if parameters.nnFile is not None and os.path.isdir(parameters.nnFile):
        #Found a experiment directory
        if not parameters.nnFile.endswith("/"):
            parameters.nnFile += "/"

        experimentDirectory = parameters.nnFile
        resultsFileName = experimentDirectory + "results.csv"

        if os.path.exists(experimentDirectory + "parameters.pkl"):
            parametersFile = open(experimentDirectory + "parameters.pkl", 'rb')
            parameters = cPickle.load(parametersFile)
            parametersFile.close()
        else:
            parametersFile = open(experimentDirectory + "parameters.pkl", 'wb',
                                  -1)
            cPickle.dump(parameters, parametersFile)
            parametersFile.close()

        contents = os.listdir(experimentDirectory)
        networkFiles = []
        for handle in contents:
            if handle.startswith("network") and handle.endswith(".pkl"):
                networkFiles.append(handle)

        if len(networkFiles) == 0:
            #Found a premature experiment, didnt finish a single training epoch
            parameters.nnFile = None
        else:
            #Found a previous experiments network files, now find the highest epoch number
            highestNNFile = networkFiles[0]
            highestNetworkEpochNumber = int(
                highestNNFile[highestNNFile.index("_") +
                              1:highestNNFile.index(".")])
            for networkFile in networkFiles:
                networkEpochNumber = int(networkFile[networkFile.index("_") +
                                                     1:networkFile.index(".")])
                if networkEpochNumber > highestNetworkEpochNumber:
                    highestNNFile = networkFile
                    highestNetworkEpochNumber = networkEpochNumber

            startingEpoch = highestNetworkEpochNumber + 1
            #dont use full exploration, its not a good way to fill the replay memory when we already have a decent policy
            if startingEpoch > 1:
                parameters.epsilonStart = parameters.epsilonEnd

            parameters.nnFile = experimentDirectory + highestNNFile
            print "Loaded experiment: " + experimentDirectory + "\nLoaded network file:" + highestNNFile

    sys.setrecursionlimit(10000)
    ale = ALEInterface()

    Environment.initializeALEParameters(ale, parameters.seed,
                                        parameters.frameSkip,
                                        parameters.repeatActionProbability,
                                        parameters.displayScreen)
    ale.loadROM(parameters.fullRomPath)
    minimalActions = ale.getMinimalActionSet()

    agent = DQNAgent.DQNAgent(
        minimalActions, parameters.croppedHeight, parameters.croppedWidth,
        parameters.batchSize, parameters.phiLength, parameters.nnFile,
        parameters.loadWeightsFlipped, parameters.updateFrequency,
        parameters.replayMemorySize, parameters.replayStartSize,
        parameters.networkType, parameters.updateRule,
        parameters.batchAccumulator, parameters.networkUpdateDelay,
        parameters.discountRate, parameters.learningRate, parameters.rmsRho,
        parameters.rmsEpsilon, parameters.momentum, parameters.epsilonStart,
        parameters.epsilonEnd, parameters.epsilonDecaySteps,
        parameters.evalEpsilon, parameters.useSARSAUpdate,
        parameters.kReturnLength)

    for epoch in xrange(startingEpoch, parameters.epochs + 1):
        agent.startTrainingEpoch(epoch)
        runTrainingEpoch(ale, agent, epoch, parameters.stepsPerEpoch)
        agent.endTrainingEpoch(epoch)

        networkFileName = experimentDirectory + "network_" + str(
            epoch) + ".pkl"
        DeepNetworks.saveNetworkParams(agent.network.qValueNetwork,
                                       networkFileName)

        if parameters.stepsPerTest > 0 and epoch % parameters.evaluationFrequency == 0:
            agent.startEvaluationEpoch(epoch)
            avgReward = runEvaluationEpoch(ale, agent, epoch,
                                           parameters.stepsPerTest)
            holdoutQVals = agent.computeHoldoutQValues(3200)

            resultsFile = open(resultsFileName, 'a')
            resultsFile.write(
                str(epoch) + ",\t" + str(round(avgReward, 4)) + ",\t\t" +
                str(round(holdoutQVals, 4)) + "\n")
            resultsFile.close()

            agent.endEvaluationEpoch(epoch)

    agent.agentCleanup()

コード例 #35

0

ファイルを表示

ファイル: game_manager.py プロジェクト: NTAWolf/pyagents

class GameManager(object):
    """This class takes care of the interactions between an agent and
    a game across episodes, as well as overall logging of performance.
    """

    def __init__(
        self,
        game_name,
        agent,
        results_dir,
        n_epochs=1,
        n_episodes=None,
        n_frames=None,
        remove_old_results_dir=False,
        use_minimal_action_set=True,
        min_time_between_frames=0,
    ):
        """game_name is one of the supported games (there are many), as a string: "space_invaders.bin"
        agent is an an instance of a subclass of the Agent interface
        results_dir is a string representing a directory in which results and logs are placed
            If it does not exist, it is created.
        use_minimal_action_set determines whether the agent is offered all possible actions,
            or only those (minimal) that are applicable to the specific game.
        min_time_between_frames is the minimum required time in seconds between
            frames. If 0, the game is unrestricted.
        """
        self.game_name = game_name
        self.agent = agent
        self.use_minimal_action_set = use_minimal_action_set
        self.min_time_between_frames = min_time_between_frames
        self.n_epochs = n_epochs
        self.n_episodes = n_episodes
        self.n_frames = n_frames

        if (n_episodes is None and n_frames is None) or (n_episodes is not None and n_frames is not None):
            raise ValueError("Extacly one of n_episodes and n_frames " "must be defined")

        self.initialize_results_dir(results_dir, remove_old_results_dir)

        self.log = util.logging.Logger(
            ("settings", "step", "episode", "epoch", "overall"),
            "settings",
            os.path.join(self.results_dir, "GameManager.log"),
        )

        self.stats = util.logging.CSVLogger(
            os.path.join(self.results_dir, "stats.log"),
            header="epoch,episode,total_reward,n_frames,wall_time",
            print_items=True,
        )

        self._object_cache = dict()

        self.initialize_ale()
        self.initialize_agent()

        self.dump_settings()

    def initialize_results_dir(self, results_dir, remove_existing=False):
        """Creates the whole path of directories if they do no exist.
        If they do exist, raises an error unless remove_existing is True,
        in which case the existing directory is deleted.
        """
        now = datetime.now().strftime("%Y%m%d-%H-%M")
        # drop .bin, append current time down to the minute
        results_dir = os.path.join(results_dir, self.game_name[:-4] + now)

        if remove_existing:
            if os.path.exists(results_dir):
                shutil.rmtree(results_dir)
        # Should raise an error if directory exists
        os.makedirs(results_dir)

        self.results_dir = results_dir

    def initialize_ale(self):
        self.ale = ALEInterface()
        self.ale.loadROM(os.path.join(ROM_RELATIVE_LOCATION, self.game_name))

    def initialize_agent(self):
        RSC = namedtuple("RawStateCallbacks", ["raw", "grey", "rgb", "ram"])
        raw_state_callbacks = RSC(self.get_screen, self.get_screen_grayscale, self.get_screen_RGB, self.get_RAM)

        self.agent.set_raw_state_callbacks(raw_state_callbacks)
        self.agent.set_results_dir(self.results_dir)

        if self.use_minimal_action_set:
            actions = self.ale.getMinimalActionSet()
        else:
            actions = self.ale.getLegalActionSet()

        self.agent.set_available_actions(actions)

    def rest(self, already_elapsed):
        rest_time = self.min_time_between_frames - already_elapsed
        if rest_time > 0:
            sleep(rest_time)

    def run(self):
        """Runs self.n_epochs epochs, where the agent's learning is
        reset for each new epoch.
        Each epoch lasts self.n_episodes or self.n_frames, whichever is 
            defined.
        """
        self.log.overall("Starting run")
        run_start = time()
        for epoch in xrange(self.n_epochs):
            self.agent.reset()
            self.n_epoch = epoch
            self._run_epoch()
        self.log.overall("End of run ({:.2f} s)".format(time() - run_start))

    def _run_epoch(self):
        self.n_episode = 0

        start = time()
        while not self._stop_condition_met():
            self._run_episode()
            self.n_episode += 1
        wall_time = time() - start
        frames = self.ale.getFrameNumber()

        self.log.epoch("Finished epoch after {:.2f} seconds".format(wall_time))

    def _run_episode(self):
        self.ale.reset_game()
        self.agent.on_episode_start()

        total_reward = 0
        episode_start = time()

        while (not self.ale.game_over()) and (not self._stop_condition_met()):
            timestep_start = time()

            action = self.agent.select_action()
            reward = self.ale.act(action)
            self.agent.receive_reward(reward)

            total_reward += reward

            self.rest(time() - timestep_start)

        wall_time = time() - episode_start
        self.agent.on_episode_end()

        # Stats format: CSV with epoch, episode, total_reward, n_frames, wall_time
        self.stats.write(
            self.n_epoch, self.n_episode, total_reward, self.ale.getEpisodeFrameNumber(), "{:.2f}".format(wall_time)
        )

    def _stop_condition_met(self):
        if self.n_episodes:
            return self.n_episode >= self.n_episodes
        return self.ale.getFrameNumber() >= self.n_frames

    # Methods for state perception
    def get_screen(self):
        """Returns a matrix containing the current game screen in raw pixel data,
        i.e. before conversion to RGB. Handles reuse of np.array object, so it 
        will overwrite what is in the old object"""
        return self._cached("raw", self.ale.getScreen)

    def get_screen_grayscale(self):
        """Returns an np.array with the screen grayscale colours. 
        Handles reuse of np.array object, so it will overwrite what 
        is in the old object.
        """
        return self._cached("gray", self.ale.getScreenGrayscale)

    def get_screen_RGB(self):
        """Returns a numpy array with the screen's RGB colours. 
        The first positions contain the red colours, followed by
        the green colours and then the blue colours"""
        return self._cached("rgb", self.ale.getScreenRGB)

    def get_RAM(self):
        """Returns a vector containing current RAM content (byte-level).
        Handles reuse of np.array object, so it will overwrite what 
        is in the old object"""
        return self._cached("ram", self.ale.getRAM)

    def _cached(self, key, func):
        if key in self._object_cache:
            func(self._object_cache[key])
        else:
            self._object_cache[key] = func()

        return self._object_cache[key]

    def dump_settings(self):
        import json

        settings = self.get_settings()
        path = os.path.join(self.results_dir, "settings")
        with open(path, "w") as f:
            json.dump(settings, f, indent=4)

    def get_settings(self):
        """Returns a dict representing the settings needed to 
        reproduce this object and its subobjects
        """
        return {
            "game_name": self.game_name,
            "n_epochs": self.n_epochs,
            "n_episodes": self.n_episodes,
            "n_frames": self.n_frames,
            "agent": self.agent.get_settings(),
            "results_dir": self.results_dir,
            "use_minimal_action_set": self.use_minimal_action_set,
        }

コード例 #36

0

ファイルを表示

ファイル: atari.py プロジェクト: zlczlcgithub/tensorpack

class AtariPlayer(gym.Env):
    """
    A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings.

    Info:
        score: the accumulated reward in the current game
        gameOver: True when the current game is Over
    """
    def __init__(self,
                 rom_file,
                 viz=0,
                 frame_skip=4,
                 nullop_start=30,
                 live_lost_as_eoe=True,
                 max_num_frames=0):
        """
        Args:
            rom_file: path to the rom
            frame_skip: skip every k frames and repeat the action
            viz: visualization to be done.
                Set to 0 to disable.
                Set to a positive number to be the delay between frames to show.
                Set to a string to be a directory to store frames.
            nullop_start: start with random number of null ops.
            live_losts_as_eoe: consider lost of lives as end of episode. Useful for training.
            max_num_frames: maximum number of frames per episode.
        """
        super(AtariPlayer, self).__init__()
        if not os.path.isfile(rom_file) and '/' not in rom_file:
            rom_file = get_dataset_path('atari_rom', rom_file)
        assert os.path.isfile(rom_file), \
            "rom {} not found. Please download at {}".format(rom_file, ROM_URL)

        try:
            ALEInterface.setLoggerMode(ALEInterface.Logger.Error)
        except AttributeError:
            if execute_only_once():
                logger.warn("You're not using latest ALE")

        # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86
        with _ALE_LOCK:
            self.ale = ALEInterface()
            self.rng = get_rng(self)
            self.ale.setInt(b"random_seed", self.rng.randint(0, 30000))
            self.ale.setInt(b"max_num_frames_per_episode", max_num_frames)
            self.ale.setBool(b"showinfo", False)

            self.ale.setInt(b"frame_skip", 1)
            self.ale.setBool(b'color_averaging', False)
            # manual.pdf suggests otherwise.
            self.ale.setFloat(b'repeat_action_probability', 0.0)

            # viz setup
            if isinstance(viz, six.string_types):
                assert os.path.isdir(viz), viz
                self.ale.setString(b'record_screen_dir', viz)
                viz = 0
            if isinstance(viz, int):
                viz = float(viz)
            self.viz = viz
            if self.viz and isinstance(self.viz, float):
                self.windowname = os.path.basename(rom_file)
                cv2.startWindowThread()
                cv2.namedWindow(self.windowname)

            self.ale.loadROM(rom_file.encode('utf-8'))
        self.width, self.height = self.ale.getScreenDims()
        self.actions = self.ale.getMinimalActionSet()

        self.live_lost_as_eoe = live_lost_as_eoe
        self.frame_skip = frame_skip
        self.nullop_start = nullop_start

        self.action_space = spaces.Discrete(len(self.actions))
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.height, self.width),
                                            dtype=np.uint8)
        self._restart_episode()

    def get_action_meanings(self):
        return [ACTION_MEANING[i] for i in self.actions]

    def _grab_raw_image(self):
        """
        :returns: the current 3-channel image
        """
        m = self.ale.getScreenRGB()
        return m.reshape((self.height, self.width, 3))

    def _current_state(self):
        """
        :returns: a gray-scale (h, w) uint8 image
        """
        ret = self._grab_raw_image()
        # max-pooled over the last screen
        ret = np.maximum(ret, self.last_raw_screen)
        if self.viz:
            if isinstance(self.viz, float):
                cv2.imshow(self.windowname, ret)
                cv2.waitKey(int(self.viz * 1000))
        ret = ret.astype('float32')
        # 0.299,0.587.0.114. same as rgb2y in torch/image
        ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
        return ret.astype('uint8')  # to save some memory

    def _restart_episode(self):
        with _ALE_LOCK:
            self.ale.reset_game()

        # random null-ops start
        n = self.rng.randint(self.nullop_start)
        self.last_raw_screen = self._grab_raw_image()
        for k in range(n):
            if k == n - 1:
                self.last_raw_screen = self._grab_raw_image()
            self.ale.act(0)

    def reset(self):
        if self.ale.game_over():
            self._restart_episode()
        return self._current_state()

    def step(self, act):
        oldlives = self.ale.lives()
        r = 0
        for k in range(self.frame_skip):
            if k == self.frame_skip - 1:
                self.last_raw_screen = self._grab_raw_image()
            r += self.ale.act(self.actions[act])
            newlives = self.ale.lives()
            if self.ale.game_over() or \
                    (self.live_lost_as_eoe and newlives < oldlives):
                break

        isOver = self.ale.game_over()
        if self.live_lost_as_eoe:
            isOver = isOver or newlives < oldlives

        info = {'ale.lives': newlives}
        return self._current_state(), r, isOver, info

コード例 #37

0

ファイルを表示

class ALEEnvironment(Environment):
    def __init__(self, rom_file, args):
        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()
        if args.display_screen:
            self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability',
                          args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)

        if args.random_seed:
            self.ale.setInt('random_seed', args.random_seed)

        if args.record_screen_path:
            if not os.path.exists(args.record_screen_path):
                logger.info("Creating folder %s" % args.record_screen_path)
                os.makedirs(args.record_screen_path)
            logger.info("Recording screens to %s", args.record_screen_path)
            self.ale.setString('record_screen_dir', args.record_screen_path)

        if args.record_sound_filename:
            logger.info("Recording sound to %s", args.record_sound_filename)
            self.ale.setBool('sound', True)
            self.ale.setString('record_sound_filename',
                               args.record_sound_filename)

        self.ale.loadROM(rom_file)

        if args.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
            logger.info("Using minimal action set with size %d" %
                        len(self.actions))
        else:
            self.actions = self.ale.getLegalActionSet()
            logger.info("Using full action set with size %d" %
                        len(self.actions))
        logger.debug("Actions: " + str(self.actions))

        self.screen_width = args.screen_width
        self.screen_height = args.screen_height

        self.life_lost = False

    def numActions(self):
        return len(self.actions)

    def restart(self):
        # In test mode, the game is simply initialized. In train mode, if the game
        # is in terminal state due to a life loss but not yet game over, then only
        # life loss flag is reset so that the next game starts from the current
        # state. Otherwise, the game is simply initialized.
        if (self.mode == 'test' or not self.life_lost
                or  # `reset` called in a middle of episode
                self.ale.game_over()  # all lives are lost
            ):
            self.ale.reset_game()
        self.life_lost = False

    def act(self, action):
        lives = self.ale.lives()
        reward = self.ale.act(self.actions[action])
        self.life_lost = (not lives == self.ale.lives())
        return reward

    def getScreen(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, (self.screen_width, self.screen_height))
        return resized

    def isTerminal(self):
        if self.mode == 'train':
            return self.ale.game_over() or self.life_lost
        return self.ale.game_over()

コード例 #38

0

ファイルを表示

class AtariEnvironment:
    """
	Environment for playing Atari games using ALE Interface
	"""
    def __init__(self, game_path, **kwargs):
        """
		Create an environment with the provided game
		"""

        # Optional parameters
        self.screen_size = kwargs.get('screen_size', (84, 84))
        #		self.random_seed = kwargs.get('seed', 123)
        self.random_seed = kwargs.get('seed', 0)

        # Buffer for grabbing the screen from ALE
        self.screen_buffer = np.zeros((100800, ), np.uint8)

        # Create the ALE interface and load the game
        self.ale = ALEInterface()
        self.ale.setBool('color_averaging', True)
        self.ale.setFloat('repeat_action_probability', 0.0)
        self.ale.setInt('random_seed', self.random_seed)
        self.ale.loadROM(game_path)

        # Grab the set of available moves for this game
        self.move_list = self.ale.getMinimalActionSet()
        self.num_actions = len(self.move_list)
        print "Number of Actions:", self.num_actions

        self.listeners = []

        self.screen = None
#		self.screen = pygame.display.set_mode((160,210))

    def get_state(self):
        """
		Convert current screen to 84x84 np array of luminescence values. 
		"""

        self.ale.getScreenRGB(self.screen_buffer)

        # Reshape the screen buffer to an appropriate shape
        game_screen = self.screen_buffer.reshape((210, 160, 3))

        # Convert to luminosity and scale to the desired screen size
        gray_screen = np.dot(game_screen, np.array([0.299, 0.587,
                                                    0.114])).astype(np.uint8)
        gray_screen = ndimage.zoom(gray_screen, (0.4, 0.525))

        return gray_screen

    def act(self, action):
        """
		Perform an action on the environment
		"""

        ale_action = self.move_list[action]

        return self.ale.act(ale_action)

    def terminal(self):
        """
		Return if the state is a terminal state
		"""

        return self.ale.game_over()

    def lives(self):
        """
		How many lives are left
		"""

        return self.ale.lives()

    def reset_game(self):
        """
		"""

        self.ale.reset_game()

    def display(self):
        """
		"""

        game_screen = self.screen_buffer.reshape((210, 160, 3))

        game_surf = pygame.surfarray.make_surface(game_screen)
        game_surf = pygame.transform.rotate(game_surf, -90)
        self.screen.blit(game_surf, (0, 0))
        pygame.display.flip()

コード例 #39

0

ファイルを表示

ファイル: main.py プロジェクト: scientist1642/tensorflow-rl

def get_num_actions(rom_path, rom_name):
    from ale_python_interface import ALEInterface
    filename = '{0}/{1}.bin'.format(rom_path, rom_name)
    ale = ALEInterface()
    ale.loadROM(filename)
    return len(ale.getMinimalActionSet())

コード例 #40

0

ファイルを表示

regressor.fit(np.random.randn(1601, 1), append([1.], np.zeros((1600, 1))))


# regressor = skflow.TensorFlowEstimator.restore('./regressor')
def Q(s, a):
    return regressor.predict(append(s, a))


def detectState(ale):
    return cv2.resize(ale.getScreenGrayscale(), (40, 40))


while True:
    ale = ALEInterface()
    ale.loadROM("breakout.bin")
    actionSet = ale.getMinimalActionSet()
    while not ale.game_over():
        if sys.argv[1] == 'disp':
            cv2.imshow('', cv2.resize(ale.getScreenRGB(), (600, 600)))
            cv2.waitKey(1)
        s = detectState(ale)
        qvals = []
        for action in actionSet:
            qvals.append(Q(s, action)[0])
        a = actionSet[qvals.index(max(qvals))]
        X = append(s, a)
        r = ale.act(a)
        s_ = detectState(ale)
        qvals = []
        for action in actionSet:
            qvals.append(Q(s_, action)[0])

コード例 #41

0

ファイルを表示

class AleInterface(object):
    def __init__(self, game, args):

        #self.game = game

        self.ale = ALEInterface()

        # if sys.platform == 'darwin':
        #     self.ale.setBool('sound', False)  # Sound doesn't work on OSX
        # elif sys.platform.startswith('linux'):
        #     self.ale.setBool('sound', True)
        # self.ale.setBool('display_screen', True)
        #

        self.ale.setBool('display_screen', args.display_screen)
        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability',
                          args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)
        self.ale.setInt('random_seed', args.random_seed)

        #
        # if rand_seed is not None:
        #     self.ale.setInt('random_seed', rand_seed)

        rom_file = "./roms/%s.bin" % game
        if not os.path.exists(rom_file):
            print "not found rom file:", rom_file
            sys.exit(-1)
        self.ale.loadROM(rom_file)

        self.actions = self.ale.getMinimalActionSet()
        self.actionsB = self.ale.getMinimalActionSetB()

    def get_actions_num(self):
        return len(self.actions)

    def get_actions_numB(self):
        return len(self.actionsB)

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def actAB(self, actionA, actionB):
        reward = self.ale.actAB(self.actions[actionA],
                                self.actionsB[actionB - 18])
        return reward

    def get_screen_gray(self):
        return self.ale.getScreenGrayscale()

    def get_screen_rgb(self):
        return self.ale.getScreenRGB()

    def game_over(self):
        return self.ale.game_over()

    def reset_game(self):
        return self.ale.reset_game()

    def set_mode(self, mode):
        return self.ale.setMode(mode)

コード例 #42

0

ファイルを表示

ファイル: ALEEnvironment.py プロジェクト: tianhan4/hh-DQN

class ALEEnvironment():
    def __init__(self, config):
        self.history = History3D(config)
        self.history_length = config.history_length
        self.mode = config.mode
        self.life_lost = False
        self.terminal = False
        self.score = 0
        #cv2.namedWindow("Image")

        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()
        if config.display_screen:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', False)
            self.ale.setBool('display_screen', True)

        self.ale.setInt('frame_skip',
                        config.frame_skip)  # Whether skip frames or not
        self.ale.setBool('color_averaging', config.color_averaging)

        if config.random_seed:  # Random seed for repeatable experiments.
            self.ale.setInt('random_seed', config.random_seed)

        if config.record_screen_path:
            if not os.path.exists(config.record_screen_path):
                os.makedirs(config.record_screen_path)
            self.ale.setString('record_screen_dir', config.record_screen_path)

        if config.record_sound_filename:
            self.ale.setBool('sound', True)
            self.ale.setString('record_sound_filename',
                               config.record_sound_filename)

        self.ale.loadROM(config.rom_file)

        if config.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
        else:
            self.actions = self.ale.getLegalActionSet()

        self.screen_width = config.screen_width
        self.screen_height = config.screen_height

    def numActions(self):
        return len(self.actions)

    def new_game(self):
        state, terminal = self.reset()
        for _ in range(self.history_length + 1):
            self.history.add(state)
        return state, terminal, list(range(len(self.actions)))

    def reset(self):
        # In test mode, the game is simply initialized. In train mode, if the game
        # is in terminal state due to a life loss but not yet game over, then only
        # life loss flag is reset so that the next game starts from the current
        # state. Otherwise, the game is simply initialized.
        if (self.mode == 'test' or not self.life_lost or self.ale.game_over()):
            # `reset` called in a middle of episode  # all lives are lost
            self.ale.reset_game()
        self.life_lost = False
        return self.getScreen(), self.isTerminal()

    def step(self, action):
        lives = self.ale.lives()
        reward = self.ale.act(self.actions[action])
        self.life_lost = (not lives == self.ale.lives())
        self.score += reward
        self.current_state = self.getScreen()
        self.history.add(self.current_state)
        self.terminal = self.isTerminal()
        return reward, self.history.get(), self.terminal

    def getScreen(self):
        screen = self.ale.getScreenGrayscale()
        #print 'screen:\n',type(screen)
        #print 'screen.shape',screen.shape
        resized = cv2.resize(screen / 255.,
                             (self.screen_width, self.screen_height))

        #cv2.imshow("Image", screen)
        '''
		cv2.namedWindow("Image")
		cv2.destroyAllWindows()
		'''
        return resized

    def isTerminal(self):
        if self.mode == 'train':
            return self.ale.game_over() or self.life_lost
        return self.ale.game_over()

コード例 #43

0

ファイルを表示

class AtariEmulator:
    def __init__(self, args):
        ''' Initialize Atari environment '''

        # Parameters
        self.buffer_length = args.buffer_length
        self.screen_dims = args.screen_dims
        self.frame_skip = args.frame_skip
        self.blend_method = args.blend_method
        self.reward_processing = args.reward_processing
        self.max_start_wait = args.max_start_wait
        self.history_length = args.history_length
        self.start_frames_needed = self.buffer_length - 1 + (
            (args.history_length - 1) * self.frame_skip)

        #Initialize ALE instance
        self.ale = ALEInterface()
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        if args.watch:
            self.ale.setBool(b'sound', True)
            self.ale.setBool(b'display_screen', True)
        self.ale.loadROM(str.encode(args.rom_path + '/' + args.game + '.bin'))

        self.buffer = np.empty((self.buffer_length, 210, 160))
        self.current = 0
        self.action_set = self.ale.getMinimalActionSet()
        self.lives = self.ale.lives()

        self.reset()

    def get_possible_actions(self):
        ''' Return list of possible actions for game '''
        return self.action_set

    def get_screen(self):
        ''' Add screen to frame buffer '''
        self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale())
        self.current = (self.current + 1) % self.buffer_length

    def reset(self):
        self.ale.reset_game()
        self.lives = self.ale.lives()

        if self.max_start_wait < 0:
            print("ERROR: max start wait decreased beyond 0")
            sys.exit()
        elif self.max_start_wait <= self.start_frames_needed:
            wait = 0
        else:
            wait = random.randint(
                0, self.max_start_wait - self.start_frames_needed)
        for _ in range(wait):
            self.ale.act(self.action_set[0])

        # Fill frame buffer
        self.get_screen()
        for _ in range(self.buffer_length - 1):
            self.ale.act(self.action_set[0])
            self.get_screen()
        # get initial_states
        state = [(self.preprocess(), 0, 0, False)]
        for step in range(self.history_length - 1):
            state.append(self.run_step(0))

        # make sure agent hasn't died yet
        if self.isTerminal():
            print(
                "Agent lost during start wait.  Decreasing max_start_wait by 1"
            )
            self.max_start_wait -= 1
            return self.reset()

        return state

    def run_step(self, action):
        ''' Apply action to game and return next screen and reward '''

        raw_reward = 0
        for step in range(self.frame_skip):
            raw_reward += self.ale.act(self.action_set[action])
            self.get_screen()

        reward = None
        if self.reward_processing == 'clip':
            reward = np.clip(raw_reward, -1, 1)
        else:
            reward = raw_reward

        terminal = self.isTerminal()
        self.lives = self.ale.lives()

        return (self.preprocess(), action, reward, terminal, raw_reward)

    def preprocess(self):
        ''' Preprocess frame for agent '''

        img = None

        if self.blend_method == "max":
            img = np.amax(self.buffer, axis=0)

        return imresize(img, self.screen_dims)

    def isTerminal(self):
        return (self.isGameOver() or (self.lives > self.ale.lives()))

    def isGameOver(self):
        return self.ale.game_over()

コード例 #44

0

ファイルを表示

ファイル: atari_environment.py プロジェクト: csdlrl/dqn-atari

class AtariEnvironment:
    
    def __init__(self, args, outputDir):
        
        self.outputDir = outputDir
        self.screenCaptureFrequency = args.screen_capture_freq
        
        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', 123456)
        random.seed(123456)
        # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo
        self.ale.setFloat(b'repeat_action_probability', 0.0)

        # Load the ROM file
        self.ale.loadROM(args.rom)

        self.actionSet = self.ale.getMinimalActionSet()
        self.gameNumber = 0
        self.stepNumber = 0
        self.resetGame()

    def getNumActions(self):
        return len(self.actionSet)

    def getState(self):
        return self.state
    
    def getGameNumber(self):
        return self.gameNumber
    
    def getFrameNumber(self):
        return self.ale.getFrameNumber()
    
    def getEpisodeFrameNumber(self):
        return self.ale.getEpisodeFrameNumber()
    
    def getEpisodeStepNumber(self):
        return self.episodeStepNumber
    
    def getStepNumber(self):
        return self.stepNumber
    
    def getGameScore(self):
        return self.gameScore

    def isGameOver(self):
        return self.ale.game_over()

    def step(self, action):
        previousLives = self.ale.lives()
        reward = 0
        isTerminal = 0
        self.stepNumber += 1
        self.episodeStepNumber += 1
        
        for i in range(4):
            prevScreenRGB = self.ale.getScreenRGB()
            reward += self.ale.act(self.actionSet[action])
            screenRGB = self.ale.getScreenRGB()
    
            # Detect end of episode, I don't think I'm handling this right in terms
            # of the overall game loop (??)
            if self.ale.lives() < previousLives or self.ale.game_over():
                isTerminal = 1
                break

            if self.gameNumber % self.screenCaptureFrequency == 0:
                dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber)
                if not os.path.isdir(dir):
                    os.makedirs(dir)
                self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber()))


        maxedScreen = np.maximum(screenRGB, prevScreenRGB)
        self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber())
        self.gameScore += reward
        return reward, self.state, isTerminal

    def resetGame(self):
        if self.ale.game_over():
            self.gameNumber += 1
        self.ale.reset_game()
        self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber())
        self.gameScore = 0
        self.episodeStepNumber = 0 # environment steps vs ALE frames.  Will probably be 4*frame number

コード例 #45

0

ファイルを表示

ファイル: ALE_env.py プロジェクト: bigcapitalist/deer

class MyEnv(Environment):
    VALIDATION_MODE = 0

    def __init__(self, rng, rom="ale/breakout.bin", frame_skip=4, 
                 ale_options=[{"key": "random_seed", "value": 0}, 
                              {"key": "color_averaging", "value": True},
                              {"key": "repeat_action_probability", "value": 0.}]):
        self._mode = -1
        self._modeScore = 0.0
        self._modeEpisodeCount = 0

        self._frameSkip = frame_skip if frame_skip >= 1 else 1
        self._randomState = rng

        self._ale = ALEInterface()
        for option in ale_options:
            t = type(option["value"])
            if t is int:
                self._ale.setInt(option["key"], option["value"])
            elif t is float:
                self._ale.setFloat(option["key"], option["value"])
            elif t is bool:
                self._ale.setBool(option["key"], option["value"])
            else:
                raise ValueError("Option {} ({}) is not an int, bool or float.".format(option["key"], t))
        self._ale.loadROM(rom)

        w, h = self._ale.getScreenDims()
        self._screen = np.empty((h, w), dtype=np.uint8)
        self._reducedScreen = np.empty((84, 84), dtype=np.uint8)
        self._actions = self._ale.getMinimalActionSet()

                
    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._modeScore = 0.0
                self._modeEpisodeCount = 0
            else:
                self._modeEpisodeCount += 1
        elif self._mode != -1: # and thus mode == -1
            self._mode = -1

        self._ale.reset_game()
        for _ in range(self._randomState.randint(15)):
            self._ale.act(0)
        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
        
        return [4 * [84 * [84 * [0]]]]
        
        
    def act(self, action):
        action = self._actions[action]
        
        reward = 0
        for _ in range(self._frameSkip):
            reward += self._ale.act(action)
            if self.inTerminalState():
                break
            
        self._ale.getScreenGrayscale(self._screen)
        cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
  
        self._modeScore += reward
        return np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._modeEpisodeCount += 1
        print("== Mean score per episode is {} over {} episodes ==".format(self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount))


    def inputDimensions(self):
        return [(4, 84, 84)]

    def observationType(self, subject):
        return np.uint8

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._reducedScreen)]

    def inTerminalState(self):
        return self._ale.game_over()

コード例 #46

0

ファイルを表示

def create_environment():
    ale_int = ALEInterface()
    ale_int.loadROM(str.encode(BIN))
    num_actions = len(ale_int.getMinimalActionSet())
    return AtariEmulator(BIN), num_actions

コード例 #47

0

ファイルを表示

ファイル: atari_emulator.py プロジェクト: aleksanw/skynet

class AtariEmulator(BaseEnvironment):
    def __init__(self, actor_id, args):
        self.ale = ALEInterface()
        self.ale.setInt(b"random_seed", args.random_seed * (actor_id + 1))
        # For fuller control on explicit action repeat (>= ALE 0.5.0)
        self.ale.setFloat(b"repeat_action_probability", 0.0)
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt(b"frame_skip", 1)
        self.ale.setBool(b"color_averaging", False)
        full_rom_path = args.rom_path + "/" + args.game + ".bin"
        self.ale.loadROM(str.encode(full_rom_path))
        self.legal_actions = self.ale.getMinimalActionSet()
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.lives = self.ale.lives()

        self.random_start = args.random_start
        self.single_life_episodes = args.single_life_episodes
        self.call_on_new_frame = args.visualize

        # Processed historcal frames that will be fed in to the network
        # (i.e., four 84x84 images)
        self.observation_pool = ObservationPool(
            np.zeros((IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES), dtype=np.uint8))
        self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3),
                                   dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1),
                                    dtype=np.uint8)
        self.frame_pool = FramePool(
            np.empty((2, self.screen_height, self.screen_width),
                     dtype=np.uint8), self.__process_frame_pool)

    def get_legal_actions(self):
        return self.legal_actions

    def __get_screen_image(self):
        """
        Get the current frame luminance
        :return: the current frame
        """
        self.ale.getScreenGrayscale(self.gray_screen)
        if self.call_on_new_frame:
            self.ale.getScreenRGB(self.rgb_screen)
            self.on_new_frame(self.rgb_screen)
        return np.squeeze(self.gray_screen)

    def on_new_frame(self, frame):
        pass

    def __new_game(self):
        """ Restart game """
        self.ale.reset_game()
        self.lives = self.ale.lives()
        if self.random_start:
            wait = random.randint(0, MAX_START_WAIT)
            for _ in range(wait):
                self.ale.act(self.legal_actions[0])

    def __process_frame_pool(self, frame_pool):
        """ Preprocess frame pool """

        img = np.amax(frame_pool, axis=0)
        img = imresize(img, (84, 84), interp='nearest')
        img = img.astype(np.uint8)
        return img

    def __action_repeat(self, a, times=ACTION_REPEAT):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in range(times - FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
        # Only need to add the last FRAMES_IN_POOL frames to the frame pool
        for i in range(FRAMES_IN_POOL):
            reward += self.ale.act(self.legal_actions[a])
            self.frame_pool.new_frame(self.__get_screen_image())
        return reward

    def reset(self):
        """ Get the initial state """
        self.__new_game()
        for step in range(NR_IMAGES):
            _ = self.__action_repeat(0)
            self.observation_pool.new_observation(
                self.frame_pool.get_processed_frame())
        if self.__is_terminal():
            raise Exception('This should never happen.')
        return self.observation_pool.get_pooled_observations()

    def step(self, action):
        """ Get the next state, reward, and game over signal """

        reward = self.__action_repeat(np.argmax(action))
        self.observation_pool.new_observation(
            self.frame_pool.get_processed_frame())
        terminal = self.__is_terminal()
        self.lives = self.ale.lives()
        observation = self.observation_pool.get_pooled_observations()
        return observation, reward, terminal, None

    def __is_terminal(self):
        if self.single_life_episodes:
            return self.__is_over() or (self.lives > self.ale.lives())
        else:
            return self.__is_over()

    def __is_over(self):
        return self.ale.game_over()

    def get_noop(self):
        return [1.0, 0.0]

    def reset_with_noops(self, noops=0):
        observation = self.reset()
        if noops != 0:
            for _ in range(random.randint(0, noops)):
                observation, _, _ = self.next(self.get_noop())
        return observation

コード例 #48

0

ファイルを表示

ファイル: train_nnet_agent.py プロジェクト: switchfootsid/playing_atari

def train(gamepath, n_episodes,  display_screen,  record_weights,  reduce_exploration_prob_amount, n_frames_to_skip, exploration_prob, verbose, discount, learning_rate, load_weights, frozen_target_update_period, use_replay_mem):
    """
    :description: trains an agent to play a game 

    :type gamepath: string 
    :param gamepath: path to the binary of the game to be played

    :type n_episodes: int 
    :param n_episodes: number of episodes of the game on which to train

    display_screen : whether or not to display the screen of the game 
    
    record_weights : whether or not to save the weights of the nextwork
    
    reduce_exploration_prob_amount : amount to reduce exploration prob each episode
                                     to not reduce exploration_prob set to 0
    
    n_frames_to_skip : how frequently to determine a new action to use
    
    exploration_prob : probability of choosing a random action
    
    verbose : whether or not to print information about the run periodically
    
    discount : discount factor used in learning 
    
    learning_rate : the scaling factor for the sgd update
    
    load_weights : whether or not to load weights for the network (set the files directly below)
    
    frozen_target_update_period : the number of episodes between reseting the target of the network
    """

    # load the ale interface to interact with
    ale = ALEInterface()
    ale.setInt('random_seed', 42)

    # display/recording settings, doesn't seem to work currently
    recordings_dir = './recordings/breakout/'
    # previously "USE_SDL"
    if display_screen:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False) # Sound doesn't work on OSX
            #ale.setString("record_screen_dir", recordings_dir);
        elif sys.platform.startswith('linux'):
            ale.setBool('sound', True)
        ale.setBool('display_screen', True)

    ale.loadROM(gamepath)
    ale.setInt("frame_skip", n_frames_to_skip)
    # real actions for breakout are [0,1,3,4]
    real_actions = ale.getMinimalActionSet()

    # use a list of actions [0,1,2,3] to index into the array of real actions
    actions = np.arange(len(real_actions))

    # these theano variables are used to define the symbolic input of the network
    features = T.dvector('features')
    action = T.lscalar('action')
    reward = T.dscalar('reward')
    next_features = T.dvector('next_features')

    # load weights by file name
    # currently must be loaded by individual hidden layers
    if load_weights:
        hidden_layer_1 = file_utils.load_model('weights/hidden0_replay.pkl')
        hidden_layer_2 = file_utils.load_model('weights/hidden1_replay.pkl')
    else:
        # defining the hidden layer network structure
        # the n_hid of a prior layer must equal the n_vis of a subsequent layer
        # for q-learning the output layer must be of len(actions)
        hidden_layer_1 = HiddenLayer(n_vis=NNET_INPUT_DIMENSION, 
            n_hid=NNET_INPUT_DIMENSION, layer_name='hidden1', activation='relu')
        hidden_layer_2 = HiddenLayer(n_vis=NNET_INPUT_DIMENSION, 
            n_hid=NNET_INPUT_DIMENSION, layer_name='hidden2', activation='relu')
    hidden_layer_3 = HiddenLayer(n_vis=NNET_INPUT_DIMENSION, 
            n_hid=len(actions), layer_name='hidden3', activation='relu') 
    # the output layer is currently necessary when using tanh units in the
    # hidden layer in order to prevent a theano warning
    # currently the relu unit setting of the hidden and output layers is leaky w/ alpha=0.01
    output_layer = OutputLayer(layer_name='output', activation='relu')

    # pass a list of layers to the constructor of the network (here called "mlp")
    layers = [hidden_layer_1, hidden_layer_2, hidden_layer_3, output_layer]
    qnetwork = QNetwork(layers, discount=discount, learning_rate=learning_rate)

    # this call gets the symbolic output of the network
    # along with the parameter updates expected
    loss, updates = qnetwork.get_loss_and_updates(features, action, reward, next_features)

    # this defines the theano symbolic function used to train the network
    # 1st argument is a list of inputs, here the symbolic variables above
    # 2nd argument is the symbolic output expected
    # 3rd argument is the dictionary of parameter updates
    # 4th argument is the compilation mode
    train_model = theano.function(
                    [theano.Param(features, default=np.zeros(NNET_INPUT_DIMENSION)),
                    theano.Param(action, default=0),
                    theano.Param(reward, default=0),
                    theano.Param(next_features, default=np.zeros(NNET_INPUT_DIMENSION))],
                    outputs=loss,
                    updates=updates,
                    mode='FAST_RUN')

    sym_action = qnetwork.get_action(features)
    get_action = theano.function([features], sym_action)

    # some containers for collecting information about the training processes 
    rewards = []
    losses = []
    best_reward = 4
    sequence_examples = []
    sampled_examples = []

    # the preprocessor and feature extractor to use
    preprocessor = screen_utils.RGBScreenPreprocessor()
    feature_extractor = feature_extractors.NNetOpenCVBoundingBoxExtractor(max_features=MAX_FEATURES)

    if use_replay_mem:
        replay_mem = ReplayMemory()
    # main training loop, each episode is a full playthrough of the game
    for episode in xrange(n_episodes):

        # this implements the frozen target component of the network
        # by setting the frozen layers of the network to a copy of the current layers
        if episode % frozen_target_update_period == 0:
            qnetwork.frozen_layers = copy.deepcopy(qnetwork.layers)


        # some variables for collecting information about this particular run of the game
        total_reward = 0
        action = 1
        counter = 0
        reward = 0
        loss = 0
        previous_param_0 = None

        # lives here is used for the reward heuristic of subtracting 1 from the reward 
        # when we lose a life. currently commented out this functionality because
        # i think it might not be helpful.
        lives = ale.lives()

        # the initial state of the screen and state
        screen = np.zeros((preprocessor.dim, preprocessor.dim, preprocessor.channels))
        state = { "screen" : screen, "objects" : None, "prev_objects": None, "features": np.zeros(MAX_FEATURES)}
        
        # start the actual play through of the game
        while not ale.game_over():
            counter += 1

            # get the current features, which is the representation of the state provided to 
            # the "agent" (here just the network directly)
            features = state["features"]

            # epsilon greedy action selection (note that exploration_prob is reduced by
            # reduce_exploration_prob_amount after every game)
            if random.random() < exploration_prob: 
                action = random.choice(actions)
            else:
                # to choose an action from the network, we fprop 
                # the current state and take the argmax of the output
                # layer (i.e., the action that corresponds to the 
                # maximum q value)
                action = get_action(features)

            # take the action and receive the reward
            reward += ale.act(real_actions[action])

            # this is commented out because i think it might not be helpful
            if ale.lives() < lives: 
                 lives = ale.lives()
                 reward -= 1


            # get the next screen, preprocess it, initialize the next state
            next_screen = ale.getScreenRGB()
            next_screen = preprocessor.preprocess(next_screen)
            next_state = {"screen": next_screen, "objects": None, "prev_objects": state["objects"]}

            # get the features for the next state
            next_features = feature_extractor(next_state, action=None)

            if use_replay_mem:
                sars_tuple = (features, action, reward, next_features)
                replay_mem.store(sars_tuple)
                num_samples = 5 if replay_mem.isFull() else 1
                for i in range(0, num_samples):
                    random_train_tuple = replay_mem.sample()
                    loss += train_model(*random_train_tuple)

                # collect for pca
                sequence_examples.append(list(sars_tuple[0]) + [sars_tuple[1]] \
                         + [sars_tuple[2]] + sars_tuple[3])
                sequence_examples = sequence_examples[-100:]
                sampled_examples.append(list(random_train_tuple[0]) + [random_train_tuple[1]] \
                        + [random_train_tuple[2]] + random_train_tuple[3])
                sampled_examples = sampled_examples[-100:]
            else:
                # call the train model function
                loss += train_model(features, action, reward, next_features)
            # prepare for the next loop through the game
            next_state["features"] = next_features
            state = next_state
                
            # weird counter value to avoid interaction with any other counter
            # loop that might be added, not necessary right now
            if verbose and counter % PRINT_TRAINING_INFO_PERIOD == 0:
                print('*' * 15 + ' training information ' + '*' * 15) 
                print('episode: {}'.format(episode))
                print('reward: \t{}'.format(reward))
                print('avg reward: \t{}'.format(np.mean(rewards)))
                print 'avg reward (last 25): \t{}'.format(np.mean(rewards[-NUM_EPISODES_AVERAGE_REWARD_OVER:]))
                print('action: \t{}'.format(real_actions[action]))
                print('exploration prob: {}'.format(exploration_prob))
                
                param_info = [(p.eval(), p.name) for p in qnetwork.get_params()]
                for index, (val, name) in enumerate(param_info):
                    if previous_param_0 is None and index == 0:
                        previous_param_0 = val
                    print('parameter {} value: \n{}'.format(name, val))
                    if index == 0:
                        diff = val - previous_param_0
                        print('difference from previous param {}: \n{}'.format(name, diff))

                print('features: \t{}'.format(features))
                print('next_features: \t{}'.format(next_features))

                scaled_sequence = preprocessing.scale(np.array(sequence_examples))
                scaled_sampled = preprocessing.scale(np.array(sampled_examples))
                pca = PCA()
                _ = pca.fit_transform(scaled_sequence)
                print('variance explained by first component for sequence: {}%'.format(pca. \
                    explained_variance_ratio_[0] * 100))
                _ = pca.fit_transform(scaled_sampled)
                print('variance explained by first component for sampled: {}%'.format(pca. \
                    explained_variance_ratio_[0] * 100))

                print('*' * 52)
                print('\n')

            # collect info and total reward and also reset the reward to 0 if we reach this point
            total_reward += reward
            reward = 0
        # collect stats from this game run    
        losses.append(loss)
        rewards.append(total_reward)
    
        # if we got a best reward, inform the user 
        if total_reward > best_reward:
            best_reward = total_reward
            print("best reward!: {}".format(total_reward))

        # record the weights if record_weights=True
        # must record the weights of the indiviual layers
        # only save hidden layers b/c output layer does not have weights
        if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights:
            file_utils.save_rewards(rewards)
            file_utils.save_model(qnetwork.layers[0], 'weights/hidden0_{}.pkl'.format(episode))
            file_utils.save_model(qnetwork.layers[1], 'weights/hidden1_{}.pkl'.format(episode))

        # reduce exploration policy over time
        if exploration_prob > MINIMUM_EXPLORATION_EPSILON:
            exploration_prob -= reduce_exploration_prob_amount
        
        # inform user of how the episode went and reset the game
        print('episode: {} ended with score: {}\tloss: {}'.format(episode, rewards[-1], losses[-1]))
        ale.reset_game()

    # return the list of rewards attained
    return rewards

コード例 #49

0

ファイルを表示

class ALEEnvironment(Environment):
    def __init__(self, rom_file, args):
        from ale_python_interface import ALEInterface
        self.ale = ALEInterface()
        if args.display_screen:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        self.ale.setInt('frame_skip', args.frame_skip)
        self.ale.setFloat('repeat_action_probability',
                          args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)

        if args.random_seed:
            self.ale.setInt('random_seed', args.random_seed)

        if args.record_screen_path:
            if not os.path.exists(args.record_screen_path):
                logger.info("Creating folder %s" % args.record_screen_path)
                os.makedirs(args.record_screen_path)
            logger.info("Recording screens to %s", args.record_screen_path)
            self.ale.setString('record_screen_dir', args.record_screen_path)

        if args.record_sound_filename:
            logger.info("Recording sound to %s", args.record_sound_filename)
            self.ale.setBool('sound', True)
            self.ale.setString('record_sound_filename',
                               args.record_sound_filename)

        self.ale.loadROM(rom_file)

        if args.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
            logger.info("Using minimal action set with size %d" %
                        len(self.actions))
        else:
            self.actions = self.ale.getLegalActionSet()
            logger.info("Using full action set with size %d" %
                        len(self.actions))
        logger.debug("Actions: " + str(self.actions))

        # OpenCV expects width as first and height as second
        self.dims = (args.screen_width, args.screen_height)

    def numActions(self):
        return len(self.actions)

    def restart(self):
        self.ale.reset_game()

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def getScreen(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        return resized

    def isTerminal(self):
        return self.ale.game_over()

コード例 #50

0

ファイルを表示

ファイル: ale.py プロジェクト: yukunix/tensorforce

class ALE(Environment):
    def __init__(self,
                 rom,
                 frame_skip=1,
                 repeat_action_probability=0.0,
                 loss_of_life_termination=False,
                 loss_of_life_reward=0,
                 display_screen=False,
                 seed=np.random.RandomState()):
        """
        Initialize ALE.

        Args:
            rom: Rom filename and directory.
            frame_skip: Repeat action for n frames. Default 1.
            repeat_action_probability: Repeats last action with given probability. Default 0.
            loss_of_life_termination: Signals a terminal state on loss of life. Default False.
            loss_of_life_reward: Reward/Penalty on loss of life (negative values are a penalty). Default 0.
            display_screen: Displays the emulator screen. Default False.
            seed: Random seed
        """

        self.ale = ALEInterface()
        self.rom = rom

        self.ale.setBool(b'display_screen', display_screen)
        self.ale.setInt(b'random_seed', seed.randint(0, 9999))
        self.ale.setFloat(b'repeat_action_probability',
                          repeat_action_probability)
        self.ale.setBool(b'color_averaging', False)
        self.ale.setInt(b'frame_skip', frame_skip)

        # all set commands must be done before loading the ROM
        self.ale.loadROM(rom.encode())

        # setup gamescreen object
        width, height = self.ale.getScreenDims()
        self.gamescreen = np.empty((height, width, 3), dtype=np.uint8)

        self.frame_skip = frame_skip

        # setup action converter
        # ALE returns legal action indexes, convert these to just numbers
        self.action_inds = self.ale.getMinimalActionSet()

        # setup lives
        self.loss_of_life_reward = loss_of_life_reward
        self.cur_lives = self.ale.lives()
        self.loss_of_life_termination = loss_of_life_termination
        self.life_lost = False

    def __str__(self):
        return 'ALE({})'.format(self.rom)

    def close(self):
        self.ale = None

    def reset(self):
        self.ale.reset_game()
        self.cur_lives = self.ale.lives()
        self.life_lost = False
        # clear gamescreen
        self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.uint8)
        return self.current_state

    def execute(self, action):
        # convert action to ale action
        ale_action = self.action_inds[action]

        # get reward and process terminal & next state
        rew = self.ale.act(ale_action)
        if self.loss_of_life_termination or self.loss_of_life_reward != 0:
            new_lives = self.ale.lives()
            if new_lives < self.cur_lives:
                self.cur_lives = new_lives
                self.life_lost = True
                rew += self.loss_of_life_reward

        terminal = self.is_terminal
        state_tp1 = self.current_state
        return state_tp1, rew, terminal

    @property
    def states(self):
        return dict(shape=self.gamescreen.shape, type=float)

    @property
    def actions(self):
        return dict(continuous=False,
                    num_actions=len(self.action_inds),
                    names=self.action_names)

    @property
    def current_state(self):
        self.gamescreen = self.ale.getScreenRGB(self.gamescreen)
        return np.copy(self.gamescreen)

    @property
    def is_terminal(self):
        if self.loss_of_life_termination and self.life_lost:
            return True
        else:
            return self.ale.game_over()

    @property
    def action_names(self):
        action_names = [
            'No-Op', 'Fire', 'Up', 'Right', 'Left', 'Down', 'Up Right',
            'Up Left', 'Down Right', 'Down Left', 'Up Fire', 'Right Fire',
            'Left Fire', 'Down Fire', 'Up Right Fire', 'Up Left Fire',
            'Down Right Fire', 'Down Left Fire'
        ]
        return np.asarray(action_names)[self.action_inds]

コード例 #51

0

ファイルを表示

ファイル: atari.py プロジェクト: kngwyu/deep_abstract_q_network

class AtariEnvironment(interfaces.Environment):
    def __init__(self,
                 atari_rom,
                 frame_skip=4,
                 noop_max=30,
                 terminate_on_end_life=False,
                 random_seed=123,
                 frame_history_length=4,
                 use_gui=False,
                 max_num_frames=500000,
                 repeat_action_probability=0.0,
                 record_screen_dir=None):
        self.ale = ALEInterface()
        self.ale.setInt('random_seed', random_seed)
        self.ale.setInt('frame_skip', 1)
        self.ale.setFloat('repeat_action_probability', 0.0)
        self.ale.setInt('max_num_frames_per_episode', max_num_frames)
        if record_screen_dir is not None:
            self.ale.setString('record_screen_dir', record_screen_dir)
        self.ale.loadROM(atari_rom)
        self.frame_skip = frame_skip
        self.repeat_action_probability = repeat_action_probability
        self.noop_max = noop_max
        self.terminate_on_end_life = terminate_on_end_life
        self.current_lives = self.ale.lives()
        self.is_terminal = False
        self.previous_action = 0
        self.num_actions = len(self.ale.getMinimalActionSet())

        w, h = self.ale.getScreenDims()
        self.screen_width = w
        self.screen_height = h
        self.zero_last_frames = [
            np.zeros((84, 84), dtype=np.uint8),
            np.zeros((84, 84), dtype=np.uint8)
        ]
        self.last_two_frames = copy.copy(self.zero_last_frames)
        self.zero_history_frames = [
            np.zeros((84, 84), dtype=np.uint8)
            for i in range(0, frame_history_length)
        ]
        self.frame_history = copy.copy(self.zero_history_frames)
        atari_actions = self.ale.getMinimalActionSet()
        self.atari_to_onehot = dict(
            list(zip(atari_actions, list(range(len(atari_actions))))))
        self.onehot_to_atari = dict(
            list(zip(list(range(len(atari_actions))), atari_actions)))
        self.screen_image = np.zeros(self.screen_height * self.screen_width,
                                     dtype=np.uint8)

        self.use_gui = use_gui
        self.original_frame = np.zeros((h, w), dtype=np.uint8)
        self.refresh_time = datetime.timedelta(milliseconds=1000 / 60)
        self.last_refresh = datetime.datetime.now()
        if (self.use_gui):
            self.gui_screen = pygame.display.set_mode((w, h))

    def getRAM(self, ram=None):
        return self.ale.getRAM(ram)

    def _get_frame(self):
        self.ale.getScreenGrayscale(self.screen_image)
        image = self.screen_image.reshape(
            [self.screen_height, self.screen_width, 1])
        self.original_frame = image
        image = cv2.resize(image, (84, 84))
        return image

    def perform_action(self, onehot_index_action):
        if self.repeat_action_probability > 0:
            if np.random.uniform() < self.repeat_action_probability:
                onehot_index_action = self.previous_action
            self.previous_action = onehot_index_action
        action = self.onehot_to_atari[onehot_index_action]
        state, action, reward, next_state, self.is_terminal = self.perform_atari_action(
            action)
        return state, onehot_index_action, reward, next_state, self.is_terminal

    def perform_atari_action(self, atari_action):
        state = self.get_current_state()
        reward = self._act(atari_action, self.frame_skip)

        if self.use_gui:
            self.refresh_gui()

        self.frame_history[:-1] = self.frame_history[1:]
        self.frame_history[-1] = np.max(self.last_two_frames, axis=0)
        next_state = self.get_current_state()

        return state, atari_action, reward, next_state, self.is_terminal

    def _act(self, ale_action, repeat):
        reward = 0
        for i in range(repeat):
            reward += self.ale.act(ale_action)
            if i >= repeat - 2:
                self.last_two_frames = [
                    self.last_two_frames[1],
                    self._get_frame()
                ]

        self.is_terminal = self.ale.game_over()

        # terminate the episode if current_lives has decreased
        lives = self.ale.lives()
        if self.current_lives != lives:
            if self.current_lives > lives and self.terminate_on_end_life:
                self.is_terminal = True
            self.current_lives = lives

        return reward

    def get_current_state(self):
        #return copy.copy(self.frame_history)
        return [x.copy() for x in self.frame_history]

    def get_actions_for_state(self, state):
        return [
            self.atari_to_onehot[a] for a in self.ale.getMinimalActionSet()
        ]

    def reset_environment(self):
        self.last_two_frames = [self.zero_history_frames[0], self._get_frame()]

        if self.terminate_on_end_life:
            if self.ale.game_over():
                self.ale.reset_game()
        else:
            self.ale.reset_game()

        self.current_lives = self.ale.lives()

        if self.noop_max > 0:
            num_noops = np.random.randint(self.noop_max + 1)
            self._act(0, num_noops)

        self.previous_action = 0
        self.frame_history = copy.copy(self.zero_history_frames)
        self.frame_history[-1] = np.max(self.last_two_frames, axis=0)

        if self.use_gui:
            self.refresh_gui()

    def is_current_state_terminal(self):
        return self.is_terminal

    def refresh_gui(self):
        current_time = datetime.datetime.now()
        if (current_time - self.last_refresh) > self.refresh_time:
            self.last_refresh = current_time

            gui_image = np.tile(
                np.transpose(self.original_frame, axes=(1, 0, 2)), [1, 1, 3])
            # gui_image = np.zeros((self.screen_width, self.screen_height, 3), dtype=np.uint8)
            # channel = np.random.randint(3)
            # gui_image[:,:,channel] = np.transpose(self.original_frame, axes=(1, 0, 2))[:,:,0]

            pygame.surfarray.blit_array(self.gui_screen, gui_image)
            pygame.display.update()

コード例 #52

0

ファイルを表示

class ALEEnvironment():
    def __init__(self, rom_file, args):

        self.ale = ALEInterface()
        self.histLen = 4

        if args.display_screen:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        self.ale.setInt('frame_skip', args.frame_skip)
        #self.ale.setFloat('repeat_action_probability', args.repeat_action_probability)
        self.ale.setBool('color_averaging', args.color_averaging)

        if args.random_seed:
            self.ale.setInt('random_seed', args.random_seed)
        self.ale.loadROM(rom_file)

        if args.minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
            logger.info("Using minimal action set with size %d" %
                        len(self.actions))
        else:
            self.actions = self.ale.getLegalActionSet()
            logger.info("Using full action set with size %d" %
                        len(self.actions))
        logger.debug("Actions: " + str(self.actions))

        self.screen_width = args.screen_width
        self.screen_height = args.screen_height

        self.mode = "train"
        self.life_lost = False
        self.initSrcreen = self.getScreen()
        self.goalSet = []
        self.goalSet.append([[70, 65], [74, 71]])  # lower right ladder 4
        self.goalSet.append([[11, 58], [15, 66]])  # lower left ladder 3
        self.goalSet.append([[7, 41], [11, 45]])  # key 5
        self.goalCenterLoc = []
        for goal in self.goalSet:
            goalCenter = [
                float(goal[0][0] + goal[1][0]) / 2,
                float(goal[0][1] + goal[1][1]) / 2
            ]
            self.goalCenterLoc.append(goalCenter)
        self.agentOriginLoc = [42, 33]
        self.agentLastX = 42
        self.agentLastY = 33
        self.reachedGoal = [0, 0, 0]
        self.histState = self.initializeHistState()

    def initializeHistState(self):
        histState = np.concatenate((self.getState(), self.getState()), axis=2)
        histState = np.concatenate((histState, self.getState()), axis=2)
        histState = np.concatenate((histState, self.getState()), axis=2)
        return histState

    def numActions(self):
        return len(self.actions)

    def resetGoalReach(self):
        self.reachedGoal = [0, 0, 0]

    def restart(self):
        # In test mode, the game is simply initialized. In train mode, if the game
        # is in terminal state due to a life loss but not yet game over, then only
        # life loss flag is reset so that the next game starts from the current
        # state. Otherwise, the game is simply initialized.
        if (self.mode == 'test' or not self.life_lost
                or  # `reset` called in a middle of episode
                self.ale.game_over()  # all lives are lost
            ):
            self.ale.reset_game()
        self.life_lost = False
        self.reachedGoal = [0, 0, 0]
        for i in range(19):
            self.act(0)  #wait for initialization
        self.histState = self.initializeHistState()
        self.agentLastX = self.agentOriginLoc[0]
        self.agentLastY = self.agentOriginLoc[1]

    def beginNextLife(self):
        self.life_lost = False
        self.reachedGoal = [0, 0, 0]
        for i in range(19):
            self.act(0)  #wait for initialization
        self.histState = self.initializeHistState()
        self.agentLastX = self.agentOriginLoc[0]
        self.agentLastY = self.agentOriginLoc[1]

    def act(self, action):
        lives = self.ale.lives()
        reward = self.ale.act(self.actions[action])
        self.life_lost = (not lives == self.ale.lives())
        currState = self.getState()
        self.histState = np.concatenate((self.histState[:, :, 1:], currState),
                                        axis=2)
        return reward

    def getScreen(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, (self.screen_width, self.screen_height))
        return resized

    def getScreenRGB(self):
        screen = self.ale.getScreenRGB()
        resized = cv2.resize(screen, (self.screen_width, self.screen_height))
        #resized = screen
        return resized

    def getAgentLoc(self):
        img = self.getScreenRGB()
        man = [200, 72, 72]
        mask = np.zeros(np.shape(img))
        mask[:, :, 0] = man[0]
        mask[:, :, 1] = man[1]
        mask[:, :, 2] = man[2]

        diff = img - mask
        indxs = np.where(diff == 0)
        diff[np.where(diff < 0)] = 0
        diff[np.where(diff > 0)] = 0
        diff[indxs] = 255
        if (np.shape(indxs[0])[0] == 0):
            mean_x = self.agentLastX
            mean_y = self.agentLastY
        else:
            mean_y = np.sum(indxs[0]) / np.shape(indxs[0])[0]
            mean_x = np.sum(indxs[1]) / np.shape(indxs[1])[0]
        self.agentLastX = mean_x
        self.agentLastY = mean_y
        return (mean_x, mean_y)

    def distanceReward(self, lastGoal, goal):
        if (lastGoal == -1):
            lastGoalCenter = self.agentOriginLoc
        else:
            lastGoalCenter = self.goalCenterLoc[lastGoal]
        goalCenter = self.goalCenterLoc[goal]
        agentX, agentY = self.getAgentLoc()
        dis = np.sqrt((goalCenter[0] - agentX) * (goalCenter[0] - agentX) +
                      (goalCenter[1] - agentY) * (goalCenter[1] - agentY))
        disLast = np.sqrt((lastGoalCenter[0] - agentX) *
                          (lastGoalCenter[0] - agentX) +
                          (lastGoalCenter[1] - agentY) *
                          (lastGoalCenter[1] - agentY))
        disGoals = np.sqrt((goalCenter[0] - lastGoalCenter[0]) *
                           (goalCenter[0] - lastGoalCenter[0]) +
                           (goalCenter[1] - lastGoalCenter[1]) *
                           (goalCenter[1] - lastGoalCenter[1]))
        return 0.001 * (disLast - dis) / disGoals

    # add color channel for input of network
    def getState(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, (self.screen_width, self.screen_height))
        return np.reshape(resized, (84, 84, 1))

    def getStackedState(self):
        return self.histState

    def isTerminal(self):
        if self.mode == 'train':
            return self.ale.game_over() or self.life_lost
        return self.ale.game_over()

    def isGameOver(self):
        return self.ale.game_over()

    def isLifeLost(self):
        return self.life_lost

    def reset(self):
        self.ale.reset_game()
        self.life_lost = False

    def goalReached(self, goal):
        goalPosition = self.goalSet[goal]
        goalScreen = self.initSrcreen
        stateScreen = self.getScreen()
        count = 0
        for y in range(goalPosition[0][0], goalPosition[1][0]):
            for x in range(goalPosition[0][1], goalPosition[1][1]):
                if goalScreen[x][y] != stateScreen[x][y]:
                    count = count + 1
        # 30 is total number of pixels of agent
        if float(count) / 30 > 0.3:
            self.reachedGoal[goal] = 1
            return True
        return False

    def goalNotReachedBefore(self, goal):
        if (self.reachedGoal[goal] == 1):
            return False
        return True

コード例 #53

0

ファイルを表示

ファイル: ALE_environment.py プロジェクト: Islandman93/reinforcepy

class ALEEnvironment(BaseEnvironment):
    """
    The :class:`MinimalGameHandler` class takes care of the interface to the ALE and tries to do nothing else. It's
    meant for advanced users who need fine control over every aspect of the process. It has many functions that are simply
    wrappers of the underlying ALE but with pythonic names/usage.

    Parameters
    ----------
    rom : byte string
        Specifies the directory to load the rom from. Must be a byte string: b'dir_for_rom/rom.bin'
    display_screen : boolean
        Default False. Whether or not to show the game. True takes longer to run but can be fun to watch
    step_cap: int
        Default None. Maximum number of steps to run in an episode. Breakout can sometimes not return terminal
        even when game is ended. This fixes that and will return terminal after stepping above this count
    """
    def __init__(self, rom, resize_shape=(84, 84), skip_frame=1, repeat_action_probability=0.0,
                 step_cap=None, loss_of_life_termination=False, loss_of_life_negative_reward=False,
                 grayscale=True, display_screen=False, seed=np.random.RandomState()):
        # set up emulator
        self.ale = ALEInterface()

        if display_screen:
            self.ale.setBool(b'display_screen', True)

        self.ale.setInt(b'frame_skip', skip_frame)
        self.ale.setInt(b'random_seed', seed.randint(0, 9999))
        self.ale.setFloat(b'repeat_action_probability', repeat_action_probability)
        self.ale.setBool(b'color_averaging', False)

        self.ale.loadROM(rom.encode())

        # setup gamescreen object. I think this is faster than recreating an empty each time
        width, height = self.ale.getScreenDims()
        channels = 1 if grayscale else 3
        self.grayscale = grayscale
        self.gamescreen = np.empty((height, width, 1), dtype=np.uint8)

        self.resize_shape = resize_shape
        self.skip_frame = skip_frame
        self.step_cap = step_cap
        self.curr_step_count = 0

        # setup action converter
        # ALE returns legal action indexes, convert these to just numbers
        self.action_inds = self.ale.getMinimalActionSet()

        # setup lives
        self.loss_of_life_negative_reward = loss_of_life_negative_reward
        self.cur_lives = self.ale.lives()
        self.loss_of_life_termination = loss_of_life_termination
        self.life_lost = False

    def reset(self):
        self.ale.reset_game()
        self.cur_lives = self.ale.lives()
        self.life_lost = False
        self.curr_step_count = 0

    def step(self, action):
        self.curr_step_count += 1
        ale_action = self.action_inds[action]
        return self._step(ale_action)

    def _step(self, ale_action):
        if not self.loss_of_life_termination and not self.loss_of_life_negative_reward:
            return self.ale.act(ale_action)
        else:
            rew = self.ale.act(ale_action)
            new_lives = self.ale.lives()
            if new_lives < self.cur_lives:
                # if loss of life is negative reward subtract 1 from reward
                if self.loss_of_life_negative_reward:
                    rew -= 1
                self.cur_lives = new_lives
                self.life_lost = True
            return rew

    def get_state(self):
        if self.grayscale:
            self.gamescreen = self.ale.getScreenGrayscale(self.gamescreen)
        else:
            self.gamescreen = self.ale.getScreenRGB(self.gamescreen)
        # if resize_shape is none then don't resize
        if self.resize_shape is not None:
            # if grayscale we remove the last dimmension (channel)
            if self.grayscale:
                processedImg = imresize(self.gamescreen[:, :, 0], self.resize_shape)
            else:
                processedImg = imresize(self.gamescreen, self.resize_shape)
        return processedImg

    def get_state_shape(self):
        return self.resize_shape

    def get_terminal(self):
        if self.loss_of_life_termination and self.life_lost:
            return True
        elif self.step_cap is not None and self.curr_step_count > self.step_cap:
            return True
        else:
            return self.ale.game_over()

    def get_num_actions(self):
        return len(self.action_inds)

コード例 #54

0

ファイルを表示

class AtariEnvironment:
    def __init__(self, args, outputDir):

        self.outputDir = outputDir
        self.screenCaptureFrequency = args.screen_capture_freq

        self.ale = ALEInterface()
        self.ale.setInt(b'random_seed', 123456)
        random.seed(123456)
        # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo
        self.ale.setFloat(b'repeat_action_probability', 0.0)

        # Load the ROM file
        self.ale.loadROM(args.rom)

        self.actionSet = self.ale.getMinimalActionSet()
        self.gameNumber = 0
        self.stepNumber = 0
        self.resetGame()

    def getNumActions(self):
        return len(self.actionSet)

    def getState(self):
        return self.state

    def getGameNumber(self):
        return self.gameNumber

    def getFrameNumber(self):
        return self.ale.getFrameNumber()

    def getEpisodeFrameNumber(self):
        return self.ale.getEpisodeFrameNumber()

    def getEpisodeStepNumber(self):
        return self.episodeStepNumber

    def getStepNumber(self):
        return self.stepNumber

    def getGameScore(self):
        return self.gameScore

    def isGameOver(self):
        return self.ale.game_over()

    def step(self, action):
        previousLives = self.ale.lives()
        reward = 0
        isTerminal = 0
        self.stepNumber += 1
        self.episodeStepNumber += 1

        for i in range(4):
            prevScreenRGB = self.ale.getScreenRGB()
            reward += self.ale.act(self.actionSet[action])
            screenRGB = self.ale.getScreenRGB()

            # Detect end of episode, I don't think I'm handling this right in terms
            # of the overall game loop (??)
            if self.ale.lives() < previousLives or self.ale.game_over():
                isTerminal = 1
                break

            if self.gameNumber % self.screenCaptureFrequency == 0:
                dir = self.outputDir + '/screen_cap/game-%06d' % (
                    self.gameNumber)
                if not os.path.isdir(dir):
                    os.makedirs(dir)
                self.ale.saveScreenPNG(dir + '/frame-%06d.png' %
                                       (self.getEpisodeFrameNumber()))

        maxedScreen = np.maximum(screenRGB, prevScreenRGB)
        self.state = self.state.stateByAddingScreen(maxedScreen,
                                                    self.ale.getFrameNumber())
        self.gameScore += reward
        return reward, self.state, isTerminal

    def resetGame(self):
        if self.ale.game_over():
            self.gameNumber += 1
        self.ale.reset_game()
        self.state = State().stateByAddingScreen(self.ale.getScreenRGB(),
                                                 self.ale.getFrameNumber())
        self.gameScore = 0
        self.episodeStepNumber = 0  # environment steps vs ALE frames.  Will probably be 4*frame number

コード例 #55

0

ファイルを表示

ファイル: 0906.py プロジェクト: boluoweifenda/DQN

# initialization
np.random.seed(SEED)

ale = ALEInterface()
if SEED == None:
    ale.setInt('random_seed', 0)
else:
    ale.setInt('random_seed', SEED)
ale.setInt("frame_skip",frameSkip)
ale.setBool('color_averaging', True)
ale.setBool('sound', False)
ale.setBool('display_screen', False)
ale.setFloat("repeat_action_probability", 0.0)
ale.loadROM(romPath)
legal_actions = ale.getMinimalActionSet()
n_actions = len(legal_actions)
opt.n_actions = n_actions

explorationRateDelta = (initialExplorationRate - finalExplorationRate)/(finalExplorationFrame-startLearningFrame)
explorationRate = initialExplorationRate + startLearningFrame*explorationRateDelta

if networkType == "CNN":
    width = 84
    height = 84
    Dim = [height,width]
else:
    width = 36
    height = 41
    Dim = [height*width]
    n_senses = width*height

コード例 #56

0

ファイルを表示

class UpdatedAtariEnv(AtariEnv):
    def __init__(self,
                 rom_path,
                 obs_type,
                 frameskip=(2, 5),
                 repeat_action_probability=0.,
                 mode=0,
                 difficulty=0):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, rom_path, obs_type)
        assert obs_type in ('ram', 'image')

        self.rom_path = rom_path
        if not os.path.exists(self.rom_path):
            raise IOError('You asked for ROM %s but path %s does not exist' %
                          (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        # Load new ALE interface, instead of atari-py
        self.ale = ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(
            repeat_action_probability,
            (float, int)), "Invalid repeat_action_probability: {!r}".format(
                repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)

        self.seed()

        # Set mode and difficulty
        self.ale.setMode(mode)
        self.ale.setDifficulty(difficulty)

        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width, screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=0, high=255, shape=(128, ))
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3))
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self._obs_type))

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        # Empirically, we need to seed before loading the ROM.
        self.ale.setInt(b'random_seed', seed2)
        # Load game from ROM instead of game path
        self.ale.loadROM(self.rom_path)
        return [seed1, seed2]

    def _get_image(self):
        return self.ale.getScreenRGB()

コード例 #57

0

ファイルを表示

ファイル: atari_emulator.py プロジェクト: BenJamesbabala/deep_rl_ale

class AtariEmulator:

	def __init__(self, args):
		''' Initialize Atari environment '''

		# Parameters
		self.buffer_length = args.buffer_length
		self.screen_dims = args.screen_dims
		self.frame_skip = args.frame_skip
		self.blend_method = args.blend_method
		self.reward_processing = args.reward_processing
		self.max_start_wait = args.max_start_wait
		self.history_length = args.history_length
		self.start_frames_needed = self.buffer_length - 1 + ((args.history_length - 1) * self.frame_skip)

		#Initialize ALE instance
		self.ale = ALEInterface()
		self.ale.setFloat(b'repeat_action_probability', 0.0)
		if args.watch:
			self.ale.setBool(b'sound', True)
			self.ale.setBool(b'display_screen', True)
		self.ale.loadROM(str.encode(args.rom_path + '/' + args.game + '.bin'))

		self.buffer = np.empty((self.buffer_length, 210, 160))
		self.current = 0
		self.action_set = self.ale.getMinimalActionSet()
		self.lives = self.ale.lives()

		self.reset()


	def get_possible_actions(self):
		''' Return list of possible actions for game '''
		return self.action_set

	def get_screen(self):
		''' Add screen to frame buffer '''
		self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale())
		self.current = (self.current + 1) % self.buffer_length


	def reset(self):
		self.ale.reset_game()
		self.lives = self.ale.lives()

		if self.max_start_wait < 0:
			print("ERROR: max start wait decreased beyond 0")
			sys.exit()
		elif self.max_start_wait <= self.start_frames_needed:
			wait = 0
		else:
			wait = random.randint(0, self.max_start_wait - self.start_frames_needed)
		for _ in range(wait):
			self.ale.act(self.action_set[0])

		# Fill frame buffer
		self.get_screen()
		for _ in range(self.buffer_length - 1):
			self.ale.act(self.action_set[0])
			self.get_screen()
		# get initial_states
		state = [(self.preprocess(), 0, 0, False)]
		for step in range(self.history_length - 1):
			state.append(self.run_step(0))

		# make sure agent hasn't died yet
		if self.isTerminal():
			print("Agent lost during start wait.  Decreasing max_start_wait by 1")
			self.max_start_wait -= 1
			return self.reset()

		return state


	def run_step(self, action):
		''' Apply action to game and return next screen and reward '''

		raw_reward = 0
		for step in range(self.frame_skip):
			raw_reward += self.ale.act(self.action_set[action])
			self.get_screen()

		reward = None
		if self.reward_processing == 'clip':
			reward = np.clip(raw_reward, -1, 1)
		else:
			reward = raw_reward

		terminal = self.isTerminal()
		self.lives = self.ale.lives()

		return (self.preprocess(), action, reward, terminal, raw_reward)



	def preprocess(self):
		''' Preprocess frame for agent '''

		img = None

		if self.blend_method == "max":
			img = np.amax(self.buffer, axis=0)

		return cv2.resize(img, self.screen_dims, interpolation=cv2.INTER_LINEAR)

	def isTerminal(self):
		return (self.isGameOver() or (self.lives > self.ale.lives()))


	def isGameOver(self):
		return self.ale.game_over()

コード例 #58

0

ファイルを表示

class Emulate:
    def __init__(self,
                 rom_file,
                 display_screen=False,
                 frame_skip=4,
                 screen_height=84,
                 screen_width=84,
                 repeat_action_probability=0,
                 color_averaging=True,
                 random_seed=0,
                 record_screen_path='screen_pics',
                 record_sound_filename=None,
                 minimal_action_set=True):
        self.ale = ALEInterface()
        if display_screen:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        self.ale.setInt('frame_skip', frame_skip)
        self.ale.setFloat('repeat_action_probability',
                          repeat_action_probability)
        self.ale.setBool('color_averaging', color_averaging)

        if random_seed:
            self.ale.setInt('random_seed', random_seed)

        self.ale.loadROM(rom_file)

        if minimal_action_set:
            self.actions = self.ale.getMinimalActionSet()
        else:
            self.actions = self.ale.getLegalActionSet()

        self.dims = (screen_width, screen_height)

    def numActions(self):
        return len(self.actions)

    def getActions(self):
        return self.actions

    def restart(self):
        self.ale.reset_game()

    def act(self, action):
        reward = self.ale.act(self.actions[action])
        return reward

    def getScreen(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        return resized

    def getScreenGray(self):
        screen = self.ale.getScreenGrayscale()
        resized = cv2.resize(screen, self.dims)
        rotated = np.rot90(resized, k=1)
        return rotated

    def getScreenColor(self):
        screen = self.ale.getScreenRGB()
        resized = cv2.resize(screen, self.dims)
        rotated = np.rot90(resized, k=1)
        return rotated

    def isTerminal(self):
        return self.ale.game_over()

コード例 #59

0

ファイルを表示

ファイル: environment.py プロジェクト: dbchiem/dqnet

class Environment:
	"""docstring for Environment"""

	BUFFER_LEN = 2
	EPISODE_FRAMES = 18000
	EPOCH_COUNT = 200
	EPOCH_STEPS = 250000
	EVAL_EPS = 0.001
	FRAMES_SKIP = 4
	FRAME_HEIGHT = 84
	FRAME_WIDTH = 84
	MAX_NO_OP = 30
	MAX_REWARD = 1
	
	def __init__(self, rom_name, rng, display_screen = False):
		self.api = ALEInterface()
		self.api.setInt('random_seed', rng.randint(333))
		self.api.setBool('display_screen', display_screen)
		self.api.setFloat('repeat_action_probability', 0.0)
		self.rom_name = rom_name
		self.display_screen = display_screen
		self.rng = rng
		self.repeat = Environment.FRAMES_SKIP
		self.buffer_len = Environment.BUFFER_LEN
		self.height = Environment.FRAME_HEIGHT
		self.width = Environment.FRAME_WIDTH
		self.episode_steps = Environment.EPISODE_FRAMES / Environment.FRAMES_SKIP
		self.merge_id = 0
		self.max_reward = Environment.MAX_REWARD
		self.eval_eps = Environment.EVAL_EPS
		self.log_dir = ''
		self.network_dir = ''

		self.api.loadROM('../rom/' + self.rom_name)
		self.minimal_actions = self.api.getMinimalActionSet()
		original_width, original_height = self.api.getScreenDims()
		self.merge_frame = np.zeros((self.buffer_len
								, original_height
								, original_width)
								, dtype = np.uint8)

	def get_action_count(self):
		return len(self.minimal_actions)

	def train(self, agent, store_freq, folder = None, start_epoch = 0):
		self._open_log_files(agent, folder)
		obs = np.zeros((self.height, self.width), dtype = np.uint8)
		epoch_count = Environment.EPOCH_COUNT

		for epoch in xrange(start_epoch, epoch_count):
			self.need_reset = True
			steps_left = Environment.EPOCH_STEPS

			print "\n" + "=" * 50
			print "Epoch #%d" % (epoch + 1)
			episode = 0
			train_start = time.time()
			while steps_left > 0:
				num_step, _ = self._run_episode(agent, steps_left, obs)
				steps_left -= num_step
				episode += 1
				if steps_left == 0 or episode % 10 == 0:
					print "Finished episode #%d, steps_left = %d" \
						% (episode, steps_left)
			train_end = time.time()

			valid_values = agent.get_validate_values()
			eval_values = self.evaluate(agent)
			test_end = time.time()

			train_time = train_end - train_start
			test_time = test_end - train_end
			step_per_sec = Environment.EPOCH_STEPS * 1. / max(1, train_time)
			print "\tFinished epoch #%d, episode trained = %d\n" \
				"\tValidate values = %.3f, evaluate reward = %.3f\n"\
				"\tTrain time = %.0fs, test time = %.0fs, steps/sec = %.4f" \
					% (epoch + 1, episode, valid_values, eval_values\
						, train_time, test_time, step_per_sec)

			self._update_log_files(agent, epoch + 1, episode
								, valid_values, eval_values
								, train_time, test_time
								, step_per_sec, store_freq)
			gc.collect()

	def evaluate(self, agent, episodes = 30, obs = None):
		print "\n***Start evaluating"
		if obs is None:
			obs = np.zeros((self.height, self.width), dtype = np.uint8)
		sum_reward = 0.0
		sum_step = 0.0
		for episode in xrange(episodes):
			self.need_reset = True
			step, reward = self._run_episode(agent, self.episode_steps, obs
											, self.eval_eps, evaluating = True)
			sum_reward += reward
			sum_step += step
			print "Finished episode %d, reward = %d, step = %d" \
					% (episode + 1, reward, step)
		self.need_reset = True
		print "Average reward per episode = %.4f" % (sum_reward / episodes)
		print "Average step per episode = %.4f" % (sum_step / episodes)
		return sum_reward / episodes

	def _prepare_game(self):
		if self.need_reset or self.api.game_over():
			self.api.reset_game()
			self.need_reset = False
			if Environment.MAX_NO_OP > 0:
				num_no_op = self.rng.randint(Environment.MAX_NO_OP + 1) \
							+ self.buffer_len
				for _ in xrange(num_no_op):
					self.api.act(0)

		for _ in xrange(self.buffer_len):
			self._update_buffer()

	def _run_episode(self, agent, steps_left, obs
					, eps = 0.0, evaluating = False):
		self._prepare_game()

		start_lives = self.api.lives()
		step_count = 0
		sum_reward = 0
		is_terminal = False
		while step_count < steps_left and not is_terminal:
			self._get_screen(obs)
			action_id, _ = agent.get_action(obs, eps, evaluating)
			
			reward = self._repeat_action(self.minimal_actions[action_id])
			reward_clip = reward
			if self.max_reward > 0:
				reward_clip = np.clip(reward, -self.max_reward, self.max_reward)

			life_lost = not evaluating and self.api.lives() < start_lives
			is_terminal = self.api.game_over() or life_lost \
						or step_count + 1 >= steps_left

			agent.add_experience(obs, is_terminal, action_id, reward_clip
								, evaluating)
			sum_reward += reward
			step_count += 1
			
		return step_count, sum_reward

	def _update_buffer(self):
		self.api.getScreenGrayscale(self.merge_frame[self.merge_id, ...])
		self.merge_id = (self.merge_id + 1) % self.buffer_len

	def _repeat_action(self, action):
		reward = 0
		for i in xrange(self.repeat):
			reward += self.api.act(action)
			if i + self.buffer_len >= self.repeat:
				self._update_buffer()
		return reward

	def _get_screen(self, resized_frame):
		self._resize_frame(self.merge_frame.max(axis = 0), resized_frame)
				
	def _resize_frame(self, src_frame, dst_frame):
		cv2.resize(src = src_frame, dst = dst_frame,
					dsize = (self.width, self.height),
					interpolation = cv2.INTER_LINEAR)

	def _open_log_files(self, agent, folder):
		time_str = time.strftime("_%m-%d-%H-%M", time.localtime())
		base_rom_name = os.path.splitext(os.path.basename(self.rom_name))[0]


		if folder is not None:
			self.log_dir = folder
			self.network_dir = self.log_dir + '/network'
		else:
			self.log_dir = '../run_results/' + base_rom_name + time_str
			self.network_dir = self.log_dir + '/network'

		info_name = get_next_name(self.log_dir, 'info', 'txt')
		git_name = get_next_name(self.log_dir, 'git-diff', '')

		try:
			os.stat(self.log_dir)
		except OSError:
			os.makedirs(self.log_dir)

		try:
			os.stat(self.network_dir)
		except OSError:
			os.makedirs(self.network_dir)

		with open(os.path.join(self.log_dir, info_name), 'w') as f:
			f.write('Commit: ' + subprocess.check_output(['git', 'rev-parse'
														, 'HEAD']))
			f.write('Run command: ')
			f.write(' '.join(pipes.quote(x) for x in sys.argv))
			f.write('\n\n')
			f.write(agent.get_info())
			write_info(f, Environment)
			write_info(f, agent.__class__)
			write_info(f, agent.network.__class__)

		# From https://github.com/spragunr/deep_q_rl/pull/49/files
		with open(os.path.join(self.log_dir, git_name), 'w') as f:
			f.write(subprocess.check_output(['git', 'diff', 'HEAD']))

		if folder is not None:
			return

		with open(os.path.join(self.log_dir, 'results.csv'), 'w') as f:
			f.write("epoch,episode_train,validate_values,evaluate_reward"\
				",train_time,test_time,steps_per_second\n")

		mem = psutil.virtual_memory()
		with open(os.path.join(self.log_dir, 'memory.csv'), 'w') as f:
			f.write("epoch,available,free,buffers,cached"\
					",available_readable,used_percent\n")
			f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \
					(0, mem.available, mem.free, mem.buffers, mem.cached
					, bytes2human(mem.available), mem.percent))

	def _update_log_files(self, agent, epoch, episode, valid_values
						, eval_values, train_time, test_time, step_per_sec
						, store_freq):
		print "Updating log files"
		with open(self.log_dir + '/results.csv', 'a') as f:
			f.write("%d,%d,%.4f,%.4f,%d,%d,%.4f\n" % \
						(epoch, episode, valid_values, eval_values
						, train_time, test_time, step_per_sec))

		mem = psutil.virtual_memory()
		with open(self.log_dir + '/memory.csv', 'a') as f:
			f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \
					(epoch, mem.available, mem.free, mem.buffers, mem.cached
					, bytes2human(mem.available), mem.percent))

		agent.dump_network(self.network_dir + ('/%03d' % (epoch)) + '.npz')

		if (store_freq >= 0 and epoch >= Environment.EPOCH_COUNT) or \
				(store_freq > 0 and (epoch % store_freq == 0)):
			agent.dump_exp(self.network_dir + '/exp.npz')

	def _setup_record(self, network_file):
		file_name, _ = os.path.splitext(os.path.basename(network_file))
		time_str = time.strftime("_%m-%d-%H-%M", time.localtime())
		img_dir = os.path.dirname(network_file) + '/images_' \
					+ file_name + time_str
		rom_name, _ = os.path.splitext(self.rom_name)
		out_name = os.path.dirname(network_file) + '/' + rom_name + '_' \
					+ file_name + time_str + '.mov'
		print out_name

		try:
			os.stat(img_dir)
		except OSError:
			os.makedirs(img_dir)

		self.api.setString('record_screen_dir', img_dir)
		self.api.loadROM('../rom/' + self.rom_name)
		return img_dir, out_name

	def record_run(self, agent, network_file, episode_id = 1):
		if episode_id > 1:
			self.evaluate(agent, episode_id - 1)
			system_state = self.api.cloneSystemState()

		img_dir, out_name = self._setup_record(network_file)

		if episode_id > 1:
			self.api.restoreSystemState(system_state)

		self.evaluate(agent, 1)
		script = \
				"""
					{
						ffmpeg -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s
					} || {
						avconv -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s
					}
				""" % (img_dir, out_name, img_dir, out_name)
		os.system(script)

コード例 #60

0

ファイルを表示

ファイル: ale_env.py プロジェクト: only4hj/DeepRL

class AleEnv():
    def __init__(self, rom, display_screen, use_env_frame_skip, frame_repeat):
        self.actions = None
        self.rom = rom
        self.display_screen = display_screen
        self.use_env_frame_skip = use_env_frame_skip
        self.frame_repeat = frame_repeat
        
    def initialize(self):
        self.ale = ALEInterface()
        self.ale.setInt("random_seed", random.randint(1, 1000))
        if self.display_screen:
            self.ale.setBool('display_screen', True)

        if self.use_env_frame_skip == True:
            self.ale.setInt('frame_skip', self.frame_repeat)
            self.ale.setBool('color_averaging', True)        
 
        self.ale.setFloat('repeat_action_probability', 0)
        self.ale.loadROM(self.rom)
        self.actions = self.ale.getMinimalActionSet()
        print 'actions: %s' % self.actions
        (self.screen_width,self.screen_height) = self.ale.getScreenDims()
        print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height))
        
        self.initialized = True
        
    def get_actions(self, rom=None):
        if self.actions is None and rom != None:
            ale = ALEInterface()
            ale.loadROM(rom)
            self.actions = ale.getMinimalActionSet()
        return self.actions
        
    @property
    def state_dtype(self):
        return np.uint8
        
    @property
    def continuous_action(self):
        return False
    
    def reset_game(self):
        self.ale.reset_game()
        
    def lives(self):
        return self.ale.lives()
    
    def getScreenRGB(self):
        return self.ale.getScreenRGB()
    
    def getState(self, debug_display=False, debug_input=None):
        screen = self.ale.getScreenGrayscale()
        if screen is not None and debug_display:
            debug_input.show(screen.reshape(screen.shape[0], screen.shape[1]))
        return screen.reshape(self.screen_height, self.screen_width)
    
    def act(self, action):
        return self.ale.act(action)
    
    def game_over(self):
        return self.ale.game_over()
    
    def finish(self):
        return