result_file = open("result_16_20.csv", "w") result_file.write( "# frame_skip_agent_A, frame_skip_agent_B, episode, score_A, score_B, reward\n" ) for frame_skip_agent_A in xrange( 16, 21): # Agent A is allowed to change his action every X frames for frame_skip_agent_B in xrange( 1, 31): # Agent B is allowed to change his action every Y frames frame_counter = 0 frame_counter_reset = frame_skip_agent_A * frame_skip_agent_B # Play 300 episodes for episode in xrange(300): total_reward = score_A = score_B = 0 ale.setMode(1) # Switch Boxing to two player mode while not ale.game_over(): # Set action for agent A if frame_counter % frame_skip_agent_A == 0: a = legal_actions[randrange(len(legal_actions))] # Set action for agent B if frame_counter % frame_skip_agent_B == 0: b = legal_actionsB[randrange(len(legal_actionsB))] # Reset frame counter to prevent overflow frame_counter += 1 if frame_counter == frame_counter_reset: frame_counter = 0 # Apply actions and get the resulting reward
class AtariEnvironment: num_actions = 18 # Use full action set def __init__(self, frame_shape, frame_postprocess=lambda x: x): self.ale = ALEInterface() self.ale.setBool(b"display_screen", cfg.display_screen) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b"color_averaging", False) self.ale.setInt(b"random_seed", cfg.random_seed) self.ale.setFloat(b"repeat_action_probability", cfg.sticky_prob) self.ale.loadROM(str.encode(cfg.rom)) self.ale.setMode(cfg.mode) self.ale.setDifficulty(cfg.difficulty) self.action_set = self.ale.getLegalActionSet() assert len(self.action_set) == AtariEnvironment.num_actions screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1,) self._frame_buffer = CircularBuffer( cfg.frame_buffer_size, screen_dims, np.uint8 ) self._frame_stack = CircularBuffer( cfg.frame_history_size, frame_shape, np.uint8 ) self._frame_postprocess = frame_postprocess self._episode_count = 0 self.reset(inc_episode_count=False) def _is_terminal(self): return self.ale.game_over() def _get_single_frame(self): stacked_frames = np.concatenate(self._frame_buffer, axis=2) maxed_frame = np.amax(stacked_frames, axis=2) expanded_frame = np.expand_dims(maxed_frame, 3) frame = self._frame_postprocess(expanded_frame) return frame def reset(self, inc_episode_count=True): self._episode_frames = 0 self._episode_reward = 0 if inc_episode_count: self._episode_count += 1 self.ale.reset_game() for _ in range(cfg.frame_buffer_size): self._frame_buffer.append(self.ale.getScreenGrayscale()) for _ in range(cfg.frame_history_size): self._frame_stack.append(self._get_single_frame()) def act(self, action): assert not self._is_terminal() cum_reward = 0 for _ in range(cfg.frame_skip): cum_reward += self.ale.act(self.action_set[action]) self._frame_buffer.append(self.ale.getScreenGrayscale()) self._frame_stack.append(self._get_single_frame()) self._episode_frames += cfg.frame_skip self._episode_reward += cum_reward cum_reward = np.clip(cum_reward, -1, 1) return cum_reward, self.state, self._is_terminal() @property def state(self): assert len(self._frame_buffer) == cfg.frame_buffer_size assert len(self._frame_stack) == cfg.frame_history_size return np.concatenate(self._frame_stack, axis=-1) @property def episode_reward(self): return self._episode_reward @property def episode_frames(self): return self._episode_frames @property def episode_steps(self): return self._episode_frames // cfg.frame_skip @property def episode_count(self): return self._episode_count
class AleInterface(object): def __init__(self, game, args): #self.game = game self.ale = ALEInterface() # if sys.platform == 'darwin': # self.ale.setBool('sound', False) # Sound doesn't work on OSX # elif sys.platform.startswith('linux'): # self.ale.setBool('sound', True) # self.ale.setBool('display_screen', True) # self.ale.setBool('display_screen', args.display_screen) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) self.ale.setInt('random_seed', args.random_seed) # # if rand_seed is not None: # self.ale.setInt('random_seed', rand_seed) rom_file = "./roms/%s.bin" % game if not os.path.exists(rom_file): print "not found rom file:", rom_file sys.exit(-1) self.ale.loadROM(rom_file) self.actions = self.ale.getMinimalActionSet() self.actionsB = self.ale.getMinimalActionSetB() def get_actions_num(self): return len(self.actions) def get_actions_numB(self): return len(self.actionsB) def act(self, action): reward = self.ale.act(self.actions[action]) return reward def actAB(self, actionA, actionB): reward = self.ale.actAB(self.actions[actionA], self.actionsB[actionB - 18]) return reward def get_screen_gray(self): return self.ale.getScreenGrayscale() def get_screen_rgb(self): return self.ale.getScreenRGB() def game_over(self): return self.ale.game_over() def reset_game(self): return self.ale.reset_game() def set_mode(self, mode): return self.ale.setMode(mode)
class UpdatedAtariEnv(AtariEnv): def __init__(self, rom_path, obs_type, frameskip=(2, 5), repeat_action_probability=0., mode=0, difficulty=0): """Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.""" utils.EzPickle.__init__(self, rom_path, obs_type) assert obs_type in ('ram', 'image') self.rom_path = rom_path if not os.path.exists(self.rom_path): raise IOError('You asked for ROM %s but path %s does not exist' % (game, self.game_path)) self._obs_type = obs_type self.frameskip = frameskip # Load new ALE interface, instead of atari-py self.ale = ALEInterface() self.viewer = None # Tune (or disable) ALE's action repeat: # https://github.com/openai/gym/issues/349 assert isinstance( repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format( repeat_action_probability) self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability) self.seed() # Set mode and difficulty self.ale.setMode(mode) self.ale.setDifficulty(difficulty) self._action_set = self.ale.getMinimalActionSet() self.action_space = spaces.Discrete(len(self._action_set)) (screen_width, screen_height) = self.ale.getScreenDims() if self._obs_type == 'ram': self.observation_space = spaces.Box(low=0, high=255, shape=(128, )) elif self._obs_type == 'image': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3)) else: raise error.Error('Unrecognized observation type: {}'.format( self._obs_type)) def seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) # Derive a random seed. This gets passed as a uint, but gets # checked as an int elsewhere, so we need to keep it below # 2**31. seed2 = seeding.hash_seed(seed1 + 1) % 2**31 # Empirically, we need to seed before loading the ROM. self.ale.setInt(b'random_seed', seed2) # Load game from ROM instead of game path self.ale.loadROM(self.rom_path) return [seed1, seed2] def _get_image(self): return self.ale.getScreenRGB()
class AtariEnvironment: def __init__(self, seed=1, record=False): self.ale = ALEInterface() self.ale.setBool(b'display_screen', FLAGS.display_screen or record) self.ale.setInt(b'frame_skip', 1) self.ale.setBool(b'color_averaging', False) self.ale.setInt(b'random_seed', seed) self.ale.setFloat(b'repeat_action_probability', FLAGS.sticky_prob) self.ale.setInt(b'max_num_frames_per_episode', FLAGS.max_num_frames_per_episode) if record: if not tf.gfile.Exists(FLAGS.record_dir): tf.gfile.MakeDirs(FLAGS.record_dir) self.ale.setBool(b'sound', True) self.ale.setString(b'record_screen_dir', str.encode(FLAGS.record_dir)) self.ale.setString(b'record_sound_filename', str.encode(FLAGS.record_dir + '/sound.wav')) self.ale.setInt(b'fragsize', 64) self.ale.loadROM(str.encode(FLAGS.rom)) self.ale.setMode(FLAGS.mode) self.ale.setDifficulty(FLAGS.difficulty) self.action_set = self.ale.getLegalActionSet() screen_dims = tuple(reversed(self.ale.getScreenDims())) + (1,) self._frame_buffer = CircularBuffer(FLAGS.frame_buffer_size, screen_dims, np.uint8) self.reset() def _is_terminal(self): return self.ale.game_over() def _get_single_frame(self): stacked_frames = np.concatenate(self._frame_buffer, axis=2) maxed_frame = np.amax(stacked_frames, axis=2) expanded_frame = np.expand_dims(maxed_frame, 3) return expanded_frame def reset(self): self._episode_frames = 0 self._episode_reward = 0 self.ale.reset_game() for _ in range(FLAGS.frame_buffer_size): self._frame_buffer.append(self.ale.getScreenGrayscale()) def act(self, action): assert not self._is_terminal() cum_reward = 0 for _ in range(FLAGS.frame_skip): cum_reward += self.ale.act(self.action_set[action]) self._frame_buffer.append(self.ale.getScreenGrayscale()) self._episode_frames += FLAGS.frame_skip self._episode_reward += cum_reward cum_reward = np.clip(cum_reward, -1, 1) return cum_reward, self._get_single_frame(), self._is_terminal() def state(self): assert len(self._frame_buffer) == FLAGS.frame_buffer_size return self._get_single_frame() def num_actions(self): return len(self.action_set) def episode_reward(self): return self._episode_reward def episode_frames(self): return self._episode_frames def frame_skip(self): return FLAGS.frame_skip
# Load the ROM file ale.loadROM(sys.argv[1]) #Get the list of available modes and difficulties avail_modes = ale.getAvailableModes() avail_diff = ale.getAvailableDifficulties() print 'Number of available modes: ', len(avail_modes) print 'Number of available difficulties: ', len(avail_diff) # Get the list of legal actions legal_actions = ale.getLegalActionSet() # Play one episode in each mode and in each difficulty for mode in avail_modes: for diff in avail_diff: ale.setDifficulty(diff) ale.setMode(mode) ale.reset_game() print 'Mode {0} difficulty {1}:'.format(mode, diff) total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] # Apply an action and get the resulting reward reward = ale.act(a) total_reward += reward print 'Episode ended with score: ', total_reward
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() # Set ALE configuration self.ale.setInt(b'frame_skip', args.frame_skip) self.ale.setFloat(b'repeat_action_probability', args.repeat_action_probability) self.ale.setBool(b'color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt(b'random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): os.makedirs(args.record_screen_path) self.ale.setString(b'record_screen_dir', args.record_screen_path.encode()) if args.record_sound_filename: self.ale.setBool(b'sound', True) self.ale.setString(b'record_sound_filename', args.record_sound_filename.encode()) # Load ROM self.ale.loadROM(rom_file.encode()) # Set game difficulty and mode (after loading) self.ale.setDifficulty(args.game_difficulty) self.ale.setMode(args.game_mode) # Whether to use minimum set or set if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() # Life lost control self.life_lost = False # Initialize base class super(ALEEnvironment, self).__init__(args) def action_dim(self): return len(self.actions) def reset(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if (self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False screen = self._get_state(self.ale.getScreenRGB()) return screen def step(self, action, action_b=0, ignore_screen=False): lives = self.ale.lives() # Act on environment reward = self.ale.act(self.actions[action], self.actions[action_b] + 18) # Check if life was lost self.life_lost = (not lives == self.ale.lives()) # Check terminal state terminal = (self.ale.game_over() or self.life_lost ) if self.mode == 'train' else self.ale.game_over() # Check if should ignore the screen (in case of RobotEnvironment) if ignore_screen: screen = None else: # Get screen from ALE screen = self._get_state(self.ale.getScreenRGB()) # Wait for next frame to start self.fps_control.wait_next_frame() return screen, reward, terminal