Ejemplo n.º 1
0
    def __init__(self, args, process_ind=0, num_envs_per_process=1):
        super(AtariEnv, self).__init__(args, process_ind, num_envs_per_process)

        # env_params for this env
        assert self.num_envs_per_process == 1
        self.seed = self.seed + self.process_ind * self.num_envs_per_actor  # NOTE: check again

        # setup ale
        self.ale = atari_py.ALEInterface()
        self.ale.setInt('random_seed', self.seed)
        self.ale.setInt('max_num_frames', self.early_stop)
        self.ale.setFloat('repeat_action_probability',
                          0)  # Disable sticky actions
        self.ale.setInt('frame_skip', 0)
        self.ale.setBool('color_averaging', False)
        print(atari_py.get_game_path(self.game))
        self.ale.loadROM(atari_py.get_game_path(
            self.game))  # ROM loading must be done after setting options
        actions = self.ale.getMinimalActionSet()
        self.actions = dict([i, e]
                            for i, e in zip(range(len(actions)), actions))

        self.lives = 0  # life counter (used in DeepMind training)
        self.just_died = False  # when lost one life, but game is still not over

        # setup
        self.exp_state1 = deque(maxlen=self.state_cha)
        self._reset_experience()
Ejemplo n.º 2
0
 def __init__(
     self,
     game,
     seed,
     device,
     training=True,
     clip_rewards_val=1,
     history_length=4,
     sticky_action_p=0,
     max_episode_length=108e3,
 ):
     # pylint: enable=bad-continuation
     self.game_name = game
     self.device = device
     self.ale = atari_py.ALEInterface()
     self.ale.setInt("random_seed", seed)
     self.ale.setInt("max_num_frames_per_episode", max_episode_length)
     self.ale.setFloat("repeat_action_probability",
                       sticky_action_p)  # Disable sticky actions
     self.ale.setInt("frame_skip", 0)
     self.ale.setBool("color_averaging", False)
     self.ale.loadROM(atari_py.get_game_path(
         game))  # ROM loading must be done after setting options
     actions = self.ale.getMinimalActionSet()
     self.actions = dict([i, e]
                         for i, e in zip(range(len(actions)), actions))
     self.action_space = Discrete(len(self.actions))
     self.lives = 0  # Life counter (used in DeepMind training)
     # Used to check if resetting only from loss of life
     self.life_termination = False
     self.window = history_length  # Number of frames to concatenate
     self.state_buffer = deque([], maxlen=history_length)
     self.training = training  # Consistent with model training mode
     self.clip_val = clip_rewards_val
     self.sticky_action_p = sticky_action_p
Ejemplo n.º 3
0
    def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, game, obs_type)
        assert obs_type in ('ram', 'image')

        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability)

        self._seed()

        (screen_width, screen_height) = self.ale.getScreenDims()
        self._buffer = np.empty((screen_height, screen_width, 4), dtype=np.uint8)

        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width,screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255)
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3))
        else:
            raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
Ejemplo n.º 4
0
  def __init__(self, args):
    self.device = args.device
    self.ale = atari_py.ALEInterface()
    self.ale.setInt('random_seed', args.seed)
    self.ale.setInt('max_num_frames_per_episode', args.max_episode_length)
    self.ale.setFloat('repeat_action_probability', 0)
    self.ale.setInt('frame_skip', 0)
    self.ale.setBool('color_averaging', False)
    self.ale.loadROM(atari_py.get_game_path(args.game))
    actions = self.ale.getMinimalActionSet()
    self.actions = dict([i, e] for i, e in zip(range(len(actions)), actions))
    self.lives = 0
    self.life_termination = False
    self.window = args.history_length
    self.state_buffer = deque([], maxlen=args.history_length)
    self.training = True
    self.encode_trans = args.encode_transitions

    self.useVLAE = args.use_encoder
    self.dataset = AtariDataset(transition=self.encode_trans)
    self.network = VLadder(self.dataset, file_path='vlae/models/', name=args.name, add_coords=True)
    if self.useVLAE == 0 or self.useVLAE == 2:
       self.xdim = 84
       self.ydim = 84
    elif self.useVLAE == 1:
       self.xdim = 1
       self.ydim = 84
Ejemplo n.º 5
0
    def __init__(self, mask_num=0 ,game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, game, obs_type)
        assert obs_type in ('ram', 'image')

        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability)

        self._seed()

        (screen_width, screen_height) = self.ale.getScreenDims()

        self._action_set = self.ale.getMinimalActionSet()

        self.mask_num = mask_num
Ejemplo n.º 6
0
 def __init__(self, args, training=True):
     self.device = args.device
     self.ale = atari_py.ALEInterface()
     seed = args.seed if training else args.seed + 1  # different seeds for training and evaluation
     self.ale.setInt('random_seed', seed)
     if training:
         self.ale.setInt('max_num_frames_per_episode',
                         args.max_episode_length)
     else:
         self.ale.setInt('max_num_frames_per_episode', 108e3)
     self.ale.setFloat('repeat_action_probability',
                       0)  # Disable sticky actions
     self.ale.setInt('frame_skip', 0)
     self.ale.setBool('color_averaging', False)
     self.ale.loadROM(atari_py.get_game_path(
         args.game))  # ROM loading must be done after setting options
     self.game = args.game
     actions = self.ale.getMinimalActionSet()
     self.actions = dict([i, e]
                         for i, e in zip(range(len(actions)), actions))
     self.lives = 0  # Life counter (used in DeepMind training)
     self.life_termination = False  # Used to check if resetting only from loss of life
     self.window = args.history_length  # Number of frames to concatenate
     self.state_buffer = deque([], maxlen=args.history_length)
     self.training = training  # Consistent with model training mode
Ejemplo n.º 7
0
    def __init__(self, args):
        self.device = args.device
        self.ale = atari_py.ALEInterface()
        self.ale.setInt("random_seed", args.seed)
        self.ale.setInt("max_num_frames_per_episode", args.max_episode_length)
        self.ale.setFloat("repeat_action_probability",
                          args.proba_sticky_actions)
        self.ale.setInt("frame_skip", 0)
        self.ale.setBool("color_averaging", False)
        # ROM loading must be done after setting options
        self.ale.loadROM(atari_py.get_game_path(args.game))
        actions = self.ale.getLegalActionSet(
        )  # We always use 18 actions. See revisiting ALE.
        self.actions = {i: e for i, e in zip(range(len(actions)), actions)}
        self.window = args.history_length  # Number of frames to concatenate
        self.state_buffer = deque([], maxlen=args.history_length)
        self.action_repeat = args.action_repeat

        self.SABER_mode = not args.disable_SABER_mode
        if self.SABER_mode:
            # We need to divide time by action repeat to get the max_step_stuck
            self.max_step_stuck_SABER = int(args.max_frame_stuck_SABER /
                                            self.action_repeat)

        # Reward on defender are really weird at the time we write this code (7 August 2019).
        # All rewards are basically multiplied by 100 and there is always a initial
        # reward of 10 both for no reason
        if args.game == "defender":
            self.handle_bug_in_defender = True
        else:
            self.handle_bug_in_defender = False
Ejemplo n.º 8
0
 def __init__(self,
              game_name,
              seed,
              max_episode_length=1e10,
              history_length=4,
              reward_clip=1,
              device='cpu'):
     self.device = device
     self.ale = atari_py.ALEInterface()
     self.ale.setInt('random_seed', seed)
     self.ale.setInt('max_num_frames_per_episode', max_episode_length)
     self.ale.setFloat('repeat_action_probability',
                       0)  # Disable sticky actions
     self.ale.setInt('frame_skip', 0)
     self.ale.setBool('color_averaging', False)
     self.ale.loadROM(atari_py.get_game_path(
         game_name))  # ROM loading must be done after setting options
     actions = self.ale.getMinimalActionSet()
     self.actions = dict(zip(range(len(actions)), actions))
     self.reward_clip = reward_clip
     self.lives = 0  # Life counter (used in DeepMind training)
     self.life_termination = False  # Used to check if resetting only from loss of life
     self.window = history_length  # Number of frames to concatenate
     self.state_buffer = deque([], maxlen=history_length)
     self.training = True  # Consistent with model training mode
     self.viewer = None
Ejemplo n.º 9
0
    def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, game, obs_type)
        assert obs_type in ('ram', 'image')

        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability)

        self.seed()

        (screen_width, screen_height) = self.ale.getScreenDims()

        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width,screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128,))
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8)
        else:
            raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
Ejemplo n.º 10
0
def get_num_actions(rom_path, rom_name):
    #import os
    #print os.path.abspath(atari_py.__file__)
    game_path = atari_py.get_game_path(rom_name)
    ale = atari_py.ALEInterface()
    ale.loadROM(game_path)
    return ale.getMinimalActionSet()
Ejemplo n.º 11
0
 def __init__(self, args):
     self.ale = atari_py.ALEInterface()
     self.ale.setInt('random_seed', args.seed)
     self.ale.setInt('max_num_frames_per_episode', args.max_episode_length)
     self.ale.setFloat('repeat_action_probability',
                       0)  # Disable sticky actions
     self.ale.setInt('frame_skip', 0)
     self.ale.setBool('color_averaging', False)
     self.ale.loadROM(atari_py.get_game_path(
         args.game))  # ROM loading must be done after setting options
     actions = self.ale.getMinimalActionSet()
     self.actions = dict([i, e]
                         for i, e in zip(range(len(actions)), actions))
     self.lives = 0  # Life counter (used in DeepMind training)
     self.life_termination = False  # Used to check if resetting only from loss of life
     self.window = args.framestack  # Number of frames to concatenate
     self.state_buffer = deque([], maxlen=args.framestack)
     self.training = True  # Consistent with model training mode
     self.grayscale = args.grayscale
     channels = 1 if self.grayscale else 3
     self.observation_space = gym.spaces.Box(low=0,
                                             high=255,
                                             dtype=np.uint8,
                                             shape=(args.framestack,
                                                    channels, 96, 96))
     self.action_space = gym.spaces.Discrete(len(self.actions))
Ejemplo n.º 12
0
 def __init__(self,
              name,
              seed,
              device,
              max_episode_length,
              frame_stack,
              dual_states,
              noops=True):
     self.device = device
     self.ale = atari_py.ALEInterface()
     self.ale.setInt('random_seed', seed)
     self.ale.setInt('max_num_frames_per_episode', 108e3)
     self.ale.setFloat('repeat_action_probability',
                       0)  # Disable sticky actions
     self.ale.setInt('frame_skip', 0)
     self.ale.setBool('color_averaging', False)
     self.ale.loadROM(atari_py.get_game_path(
         name))  # ROM loading must be done after setting options
     actions = self.ale.getMinimalActionSet()
     self.actions = dict([i, e]
                         for i, e in zip(range(len(actions)), actions))
     self.lives = 0  # Life counter (used in DeepMind training)
     self.life_termination = False  # Used to check if resetting only from loss of life
     self.window = frame_stack  # Number of frames to concatenate
     self.state_buffer = deque([], maxlen=frame_stack)
     self.training = True  # Consistent with model training mode
     self.dual_states = dual_states  # Whether to return unedited state as well
     self.steps = 0
     self.max_steps = max_episode_length
     self.noops = noops
Ejemplo n.º 13
0
    def __init__(self, game='pong', obs_type='ram'):
        utils.EzPickle.__init__(self, game, obs_type)
        assert obs_type in ('ram', 'image', 'gray')
        game_path = atari_py.get_game_path(game)
        if not os.path.exists(game_path):
            raise IOError('You asked for game %s but path %s does not exist'%(game, game_path))
        self.ale = atari_py.ALEInterface()
        self.ale.loadROM(game_path)
        self._obs_type = obs_type
        self._action_set = self.ale.getMinimalActionSet()
        self.viewer = None

        (screen_width,screen_height) = self.ale.getScreenDims()

        self.action_space = spaces.Discrete(len(self._action_set))
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255)
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3))
            self._screen = np.zeros((screen_height, screen_width, 4), dtype=np.uint8)
        elif self._obs_type == 'gray':
            self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 1))
            self._screen = np.zeros((screen_height, screen_width, 1), dtype=np.uint8)

        else:
            raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
Ejemplo n.º 14
0
    def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episode = False):
        
        self.ale = atari_py.ALEInterface()

        self.ale.setInt("random_seed", rseed * (actor_id +1))

        # For fuller control on explicit action repeat (>= ALE 0.5.0) 
        self.ale.setFloat("repeat_action_probability", 0.0)
        
        # See: http://is.gd/tYzVpj
        self.ale.setInt("frame_skip", 4)
        #self.ale.setBool("color_averaging", False)
        self.ale.loadROM(atari_py.get_game_path(rom_name))
        self.legal_actions = self.ale.getMinimalActionSet()        
        self.single_life_episode = single_life_episode
        self.initial_lives = self.ale.lives()
        
        # Processed frames that will be fed in to the network 
        # (i.e., four 84x84 images)
        self.processed_imgs = np.zeros((IMG_SIZE_X, IMG_SIZE_Y, 
            NR_IMAGES), dtype=np.uint8) 

        self.screen_width,self.screen_height = self.ale.getScreenDims()
        self.rgb_screen = np.zeros((self.screen_height,self.screen_width, 4), dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height,self.screen_width,1), dtype=np.uint8)
        
        self.visualize = visualize
        self.visualize_processed = False
        rendering_imported = False
Ejemplo n.º 15
0
def create_atari_environment(game_name=None, sticky_actions=True, frame_stack=4, screen_size=84, seed=0):
    """Wraps an Atari 2600 Gym environment with some basic preprocessing.

    This preprocessing matches the guidelines proposed in Machado et al. (2017),
    "Revisiting the Arcade Learning Environment: Evaluation Protocols and Open
    Problems for General Agents".

    The created environment is the Gym wrapper around the Arcade Learning
    Environment.

    The main choice available to the user is whether to use sticky actions or not.
    Sticky actions, as prescribed by Machado et al., cause actions to persist
    with some probability (0.25) when a new command is sent to the ALE. This
    can be viewed as introducing a mild form of stochasticity in the environment.
    We use them by default.

    Args:
      game_name: str, the name of the Atari 2600 domain.
      sticky_actions: bool, whether to use sticky_actions as per Machado et al.

    Returns:
      An Atari 2600 environment with some standard preprocessing.
    """
    assert game_name is not None
    game_path = atari_py.get_game_path(game_name)
    if not os.path.exists(game_path):
        raise IOError("You asked for game {} but path {} does not "
                      " exist".format(game_name, game_path))
    ale = atari_py.ALEInterface()
    ale.setInt(b'random_seed', seed)
    repeat_action_prob = 0.25 if sticky_actions else 0
    ale.setFloat(b'repeat_action_probability', repeat_action_prob)
    ale.loadROM(game_path)
    env = AtariPreprocessing(ale, frame_stack=frame_stack, screen_size=screen_size)
    return env
Ejemplo n.º 16
0
    def __init__(self, game='pong', obs_type='ram'):
        utils.EzPickle.__init__(self, game, obs_type)
        assert obs_type in ('ram', 'image')

        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist' %
                          (game, self.game_path))
        self._obs_type = obs_type
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        self._seed()

        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width, screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=np.zeros(128),
                                                high=np.zeros(128) + 255)
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3))
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self._obs_type))
Ejemplo n.º 17
0
    def __init__(self, game, seed=None, use_sdl=False, n_last_screens=4,
                 frame_skip=4, treat_life_lost_as_terminal=True,
                 crop_or_scale='scale', max_start_nullops=30,
                 record_screen_dir=None):
        assert crop_or_scale in ['crop', 'scale']
        assert frame_skip >= 1
        self.n_last_screens = n_last_screens
        self.treat_life_lost_as_terminal = treat_life_lost_as_terminal
        self.crop_or_scale = crop_or_scale
        self.max_start_nullops = max_start_nullops

        # atari_py is used only to provide rom files. atari_py has its own
        # ale_python_interface, but it is obsolete.
        if not atari_py_available:
            raise RuntimeError(
                'You need to install atari_py>=0.1.1 to use ALE.')
        game_path = atari_py.get_game_path(game)

        ale = atari_py.ALEInterface()
        if seed is not None:
            assert seed >= 0 and seed < 2 ** 31, \
                "ALE's random seed must be in [0, 2 ** 31)."
        else:
            # Use numpy's random state
            seed = np.random.randint(0, 2 ** 31)
        ale.setInt(b'random_seed', seed)
        ale.setFloat(b'repeat_action_probability', 0.0)
        ale.setBool(b'color_averaging', False)
        if record_screen_dir is not None:
            ale.setString(b'record_screen_dir',
                          str.encode(str(record_screen_dir)))
        self.frame_skip = frame_skip
        if use_sdl:
            if 'DISPLAY' not in os.environ:
                raise RuntimeError(
                    'Please set DISPLAY environment variable for use_sdl=True')
            # SDL settings below are from the ALE python example
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool(b'sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                ale.setBool(b'sound', True)
            ale.setBool(b'display_screen', True)

        ale.loadROM(str.encode(str(game_path)))

        assert ale.getFrameNumber() == 0

        self.ale = ale
        self.legal_actions = ale.getMinimalActionSet()
        self.initialize()

        self.action_space = spaces.Discrete(len(self.legal_actions))
        one_screen_observation_space = spaces.Box(
            low=0, high=255,
            shape=(84, 84), dtype=np.uint8,
        )
        self.observation_space = spaces.Tuple(
            [one_screen_observation_space] * n_last_screens)
Ejemplo n.º 18
0
    def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5)):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, game, obs_type)
        assert obs_type in ('ram', 'image')

        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        self._seed()

        (screen_width, screen_height) = self.ale.getScreenDims()
        self._buffer = np.empty((screen_height, screen_width, 4), dtype=np.uint8)

        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width,screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255)
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3))
        else:
            raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
Ejemplo n.º 19
0
    def __init__(self,
                 game='pong',
                 mode=None,
                 difficulty=None,
                 obs_type='image',
                 stack_size=4,
                 frameskip=(2, 5),
                 repeat_action_probability=0.,
                 full_action_space=True,
                 server_num=0,
                 img_dim=64,
                 **kwargs):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        #utils.EzPickle.__init__( self, game, mode, difficulty, obs_type,
        #        frameskip, repeat_action_probability)
        assert obs_type in ('ram', 'image')

        self.game = game
        self.game_path = atari_py.get_game_path(game)
        self.game_mode = mode
        self.game_difficulty = difficulty
        self.server_num = server_num
        self.img_dim = img_dim
        self.stack_size = stack_size
        self.episode = 0

        if not os.path.exists(self.game_path):
            msg = 'You asked for game %s but path %s does not exist'
            raise IOError(msg % (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(repeat_action_probability, (float, int)), \
                "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)

        self.seed()

        self._action_set = (self.ale.getLegalActionSet() if full_action_space
                            else self.ale.getMinimalActionSet())
        #self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width, screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            #self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128,))
            pass
        elif self._obs_type == 'image':
            #self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8)
            pass
        else:
            raise ValueError('Unrecognized observation type: {}'.format(
                self._obs_type))
    def __init__(self,
                 game="pong",
                 frame_skip=4,  # Frames per step (>=1).
                 num_img_obs=4,  # Number of (past) frames in observation (>=1) - "frame stacking".
                 clip_reward=True,
                 episodic_lives=True,
                 fire_on_reset=False,
                 max_start_noops=30,
                 repeat_action_probability=0.,
                 horizon=27000,
                 no_extrinsic=False,
                 no_negative_reward=False,
                 normalize_obs=False,
                 normalize_obs_steps=10000,
                 downsampling_scheme='classical',
                 record_freq=0,
                 record_dir=None
                 ):
        save__init__args(locals(), underscore=True)

        # ALE
        game_path = atari_py.get_game_path(game)
        if not os.path.exists(game_path):
            raise IOError("You asked for game {} but path {} does not "
                " exist".format(game, game_path))
        self.ale = atari_py.ALEInterface()
        self.ale.setFloat(b'repeat_action_probability', repeat_action_probability)
        self.ale.loadROM(game_path)

        # Spaces
        self._action_set = self.ale.getMinimalActionSet()
        self._action_space = IntBox(low=0, high=len(self._action_set))
        if downsampling_scheme == 'classical':
            self._frame_shape = (84, 84) # (W, H)
        elif downsampling_scheme == 'new':
            self._frame_shape = (80, 104)
        obs_shape = (num_img_obs, self._frame_shape[1], self._frame_shape[0])
        self._observation_space = IntBox(low=0, high=255, shape=obs_shape, dtype="uint8")
        self._max_frame = self.ale.getScreenGrayscale()
        self._raw_frame_1 = self._max_frame.copy()
        self._raw_frame_2 = self._max_frame.copy()
        self._obs = np.zeros(shape=obs_shape, dtype="uint8")

        # Settings
        self._has_fire = "FIRE" in self.get_action_meanings()
        self._has_up = "UP" in self.get_action_meanings()
        self._horizon = int(horizon)

        # Recording
        self.record_env = False # set in samping_process for environment 0
        self._record_episode = False
        self._record_freq = record_freq
        self._video_dir = os.path.join(record_dir, 'videos')
        self._frames_dir = os.path.join(self._video_dir, 'frames')
        self._episode_number = 0

        self.reset()
Ejemplo n.º 21
0
 def __init__(self):
     
     self.ale = ALEInterface();
     self.ale.loadROM(get_game_path('boxing'));
     self.legal_actions = self.ale.getMinimalActionSet();
     self.policyModel = PolicyModel(self.legal_actions);
     #load model
     if True == os.path.exists('model'): self.policyModel.load_weights('./model/vpg_model');
     self.status_size_ = 4
     self.gamma_ = 1; #the reward it too small
Ejemplo n.º 22
0
    def __init__(self, **kwargs):

        game = kwargs['game']
        frame_skip = kwargs['frame_skip']
        max_start_nullops = kwargs['max_start_nullops']
        death_ends_episode = kwargs['death_ends_episode']
        hist_len = kwargs['hist_len']
        crop_top = kwargs['crop_top']
        crop_bottom = kwargs['crop_bottom']
        seed = kwargs.get('seed', 0)
        rng = kwargs.get('rng', None)

        self.train_epoch_length = kwargs['train_epoch_length']
        self.test_epoch_length = kwargs['test_epoch_length']
        self.epoch_length = None
        self.epoch_steps = 0
        self.unwrapped = self
        self.ale = ale_python_interface.ALEInterface()
        # Implement frame skip ourselves
        self.frame_skip = frame_skip
        self.ale.setInt("frame_skip", 1)
        self.ale.setBool('display_screen', False)
        self.ale.setFloat('repeat_action_probability', 0.)

        self.ale.loadROM(atari_py.get_game_path(game))
        self.max_start_nullops = max_start_nullops
        self.actions = self.ale.getMinimalActionSet()
        # Fire button not required for deterministic pong

        if game == 'pong':
            self.actions = [0, 3, 4]

        self.observation_space = gym.spaces.Box(low=0,
                                                high=255,
                                                shape=(84, 84, hist_len))
        self.action_space = gym.spaces.Discrete(len(self.actions))

        self.width, self.height = self.ale.getScreenDims()

        self.buffer_length = 2
        self.buffer_count = 0
        self.screen_buffer = np.empty(
            (self.buffer_length, self.height, self.width), dtype=np.uint8)
        self.observations = np.zeros((84, 84, hist_len), dtype=np.float32)

        self.death_ends_episode = death_ends_episode
        self.terminal_lol = False  # Most recent episode ended on a loss of life

        self.crop_top = crop_top
        self.crop_bottom = crop_bottom

        if rng is None:
            self.rng = np.random.RandomState(seed)
        else:
            self.rng = rng
Ejemplo n.º 23
0
    def __init__(self, game='pong', obs_type='ram'):
        utils.EzPickle.__init__(self, game, obs_type)
        assert obs_type in ('ram', 'image')

        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
        self._obs_type = obs_type
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        self._seed()
Ejemplo n.º 24
0
    def __init__(
        self,
        game="pong",
        frame_skip=4,  # Frames per step (>=1).
        num_img_obs=4,  # Number of (past) frames in observation (>=1).
        clip_reward=True,
        episodic_lives=True,
        max_start_noops=30,
        repeat_action_probability=0.,
        horizon=27000,
        stack_actions=0,
        grayscale=True,
        imagesize=84,
        seed=42,
        id=0,
    ):
        save__init__args(locals(), underscore=True)
        # ALE
        game_path = atari_py.get_game_path(game)
        if not os.path.exists(game_path):
            raise IOError("You asked for game {} but path {} does not "
                          " exist".format(game, game_path))
        self.ale = atari_py.ALEInterface()
        self.seed(seed, id)
        self.ale.setFloat(b'repeat_action_probability',
                          repeat_action_probability)
        self.ale.loadROM(game_path)

        # Spaces
        self.stack_actions = stack_actions
        self._action_set = self.ale.getMinimalActionSet()
        self._action_space = IntBox(low=0, high=len(self._action_set))
        self.channels = 1 if grayscale else 3
        self.grayscale = grayscale
        self.imagesize = imagesize
        if self.stack_actions: self.channels += 1
        obs_shape = (num_img_obs, self.channels, imagesize, imagesize)
        self._observation_space = IntBox(low=0,
                                         high=255,
                                         shape=obs_shape,
                                         dtype="uint8")
        self._max_frame = self.ale.getScreenGrayscale() if self.grayscale \
            else self.ale.getScreenRGB()
        self._raw_frame_1 = self._max_frame.copy()
        self._raw_frame_2 = self._max_frame.copy()
        self._obs = np.zeros(shape=obs_shape, dtype="uint8")

        # Settings
        self._has_fire = "FIRE" in self.get_action_meanings()
        self._has_up = "UP" in self.get_action_meanings()
        self._horizon = int(horizon)
        self.reset()
Ejemplo n.º 25
0
    def __init__(self, game='pong', obs_type='ram'):
        utils.EzPickle.__init__(self, game, obs_type)
        assert obs_type in ('ram', 'image')

        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist' %
                          (game, self.game_path))
        self._obs_type = obs_type
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        self._seed()
def test_smoke():
    pong_path = atari_py.get_game_path('pong')
    ale = atari_py.ALEInterface()
    ale.loadROM(pong_path)
    action_set = ale.getMinimalActionSet()

    # Test stepping
    ale.act(action_set[0])

    # Test screen capture
    (screen_width, screen_height) = ale.getScreenDims()
    arr = np.zeros((screen_height, screen_width, 4), dtype=np.uint8)
    ale.getScreenRGB(arr)
Ejemplo n.º 27
0
    def __init__(self,
                 mode,
                 game='pong',
                 obs_type='image',
                 frameskip=(2, 5),
                 repeat_action_probability=0.,
                 full_action_space=False):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""
        utils.EzPickle.__init__(self, game, obs_type, frameskip,
                                repeat_action_probability)
        assert obs_type in ('ram', 'image')
        self.game = game
        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            msg = 'You asked for game %s but path %s does not exist'
            raise IOError(msg % (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(repeat_action_probability, (float, int)), \
                "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)
        self.seed(mode)
        self._action_set = (self.ale.getLegalActionSet() if full_action_space
                            else self.ale.getMinimalActionSet())
        self.action_space = spaces.Discrete(len(self._action_set))
        # variable to change whether the environment uses RGB images or grayscale
        self.image_type = 'rgb'

        (screen_width, screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                dtype=np.uint8,
                                                shape=(128, ))
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3),
                                                dtype=np.uint8)
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self._obs_type))
Ejemplo n.º 28
0
    def __init__(self):

        self.metadata = {'render.modes': ['human', "rgb_array"]}

        # simulate environment to be like a game(pong)
        self.ale = atari_py.ALEInterface()
        self.game_path = atari_py.get_game_path('pong')

        # initializing
        self.number_of_words_to_trans = 0
        self.ACTION = [['pick-up']]
        self.phrase, self.target = None, None
        self.in_production_mood = False
        self.initial_for_reset = None
        self.done = False
Ejemplo n.º 29
0
    def __init__(
        self,
        game="pong",
        frame_skip=4,
        num_img_obs=4,
        clip_reward=True,
        episodic_lives=True,
        max_start_noops=30,
        repeat_action_probability=0.,
    ):

        Serializable.quick_init(self, locals())

        # ALE
        game_path = atari_py.get_game_path(game)
        if not os.path.exists(game_path):
            raise IOError("You asked for game {} but path {} does not "
                          " exist".format(game, game_path))
        self.ale = atari_py.ALEInterface()
        self.ale.setFloat(b'repeat_action_probability',
                          repeat_action_probability)
        self._repeat_action_probability = repeat_action_probability
        self.ale.loadROM(game_path)
        self._game = game

        # Spaces
        self._action_set = self.ale.getMinimalActionSet()
        self._action_space = Discrete(len(self._action_set))
        obs_shape = (num_img_obs, H, W)
        self._observation_space = UintBox(shape=obs_shape, bits=8)
        self._max_frame = self.ale.getScreenGrayscale()
        self._raw_frame_1 = self._max_frame.copy()
        self._raw_frame_2 = self._max_frame.copy()
        self._obs = np.zeros(shape=obs_shape, dtype="uint8")

        # Settings
        self._frame_skip = frame_skip
        self._num_img_obs = num_img_obs
        self._clip_reward = clip_reward
        self._max_start_noops = max_start_noops
        self._has_fire = "FIRE" in self.get_action_meanings()
        self._has_up = "UP" in self.get_action_meanings()
        self._episodic_lives = episodic_lives
        self._done = self._done_episodic_lives if episodic_lives else \
            self._done_no_epidosic_lives

        # Get ready
        self.reset()
Ejemplo n.º 30
0
    def __init__(
            self,
            game="pong",  # 游戏名
            frame_skip=4,  # Frames per step (>=1).
            num_img_obs=4,  # Number of (past) frames in observation (>=1).
            clip_reward=True,
            episodic_lives=True,
            max_start_noops=30,
            repeat_action_probability=0.,
            horizon=27000,  # 人工随意定的一个值。在游戏角色没有死的时候,step大于这个值也会被判定为game over
    ):
        """
        environment对象会在Sampler类中的initialize()方法里构造。
        """
        save__init__args(
            locals(), underscore=True
        )  # 非常tricky的做法:把局部变量保存到实例的属性中,之后如果找不到self.xxx的定义就在这里面找
        # ALE,即电玩学习环境(Arcade Learning Environment),它提供了一个关于Atari 2600游戏的数百个游戏环境的接口
        game_path = atari_py.get_game_path(game)
        if not os.path.exists(game_path):
            raise IOError(
                "You asked for game {} but path {} does not exist".format(
                    game, game_path))
        self.ale = atari_py.ALEInterface()
        self.ale.setFloat(b'repeat_action_probability',
                          repeat_action_probability)
        self.ale.loadROM(game_path)

        # Spaces,某些游戏可能不会对控制输入的所有组合都使用所有动作,例如pong游戏仅使用2个方向和射击键,这里要获取当前游戏使用的动作集
        self._action_set = self.ale.getMinimalActionSet(
        )  # 最小的action set(一个numpy array)
        self._action_space = IntBox(low=0, high=len(self._action_set))
        obs_shape = (num_img_obs, H, W)  # H应该是指height,W应该是指width
        self._observation_space = IntBox(low=0,
                                         high=255,
                                         shape=obs_shape,
                                         dtype="uint8")
        self._max_frame = self.ale.getScreenGrayscale()
        self._raw_frame_1 = self._max_frame.copy()
        self._raw_frame_2 = self._max_frame.copy()
        self._obs = np.zeros(shape=obs_shape, dtype="uint8")  # 初始的observation

        # Settings
        self._has_fire = "FIRE" in self.get_action_meanings()
        self._has_up = "UP" in self.get_action_meanings()
        self._horizon = int(horizon)
        self.reset()
Ejemplo n.º 31
0
 def __init__(self, args):
     self.device = args.device
     self.ale = atari_py.ALEInterface()
     self.ale.setInt('random_seed', args.seed)
     self.ale.setInt('max_num_frames', args.max_episode_length)
     self.ale.setFloat('repeat_action_probability', 0)
     self.ale.setInt('frame_skip', 0)
     self.ale.setBool('color_averaging', False)
     self.ale.loadROM(atari_py.get_game_path(args.game))
     actions = self.ale.getMinimalActionSet()
     self.actions = dict([i, e]
                         for i, e in zip(range(len(actions)), actions))
     self.lives = 0
     self.life_termination = False
     self.window = args.history_length
     self.state_buffer = deque([], maxlen=args.history_length)
     self.training = True
Ejemplo n.º 32
0
    def __init__(
            self,
            game="pong",
            frame_shape=(80, 104),
            frame_skip=4,  # Frames per step (>=1).
            num_img_obs=4,  # Number of (past) frames in observation (>=1).
            clip_reward=True,
            episodic_lives=True,
            max_start_noops=30,
            repeat_action_probability=0.,
            horizon=27000,
            seed=0):
        save__init__args(locals(), underscore=True)
        # ALE
        game_path = atari_py.get_game_path(game)
        if not os.path.exists(game_path):
            raise IOError("You asked for game {} but path {} does not "
                          " exist".format(game, game_path))
        self.ale = atari_py.ALEInterface()
        self.ale.setFloat(b'repeat_action_probability',
                          repeat_action_probability)
        self.ale.loadROM(game_path)

        # Spaces
        self._action_set = self.ale.getMinimalActionSet()
        self._action_space = IntBox(low=0, high=len(self._action_set))
        self._frame_shape = frame_shape
        obs_shape = (num_img_obs, frame_shape[1], frame_shape[0])
        self._observation_space = IntBox(low=0,
                                         high=255,
                                         shape=obs_shape,
                                         dtype="uint8")
        self._max_frame = self.ale.getScreenGrayscale()
        self._raw_frame_1 = self._max_frame.copy()
        self._raw_frame_2 = self._max_frame.copy()
        self._obs = np.zeros(shape=obs_shape, dtype="uint8")

        self.random = np.random.RandomState(seed)

        # Settings
        self._has_fire = "FIRE" in self.get_action_meanings()
        self._has_up = "UP" in self.get_action_meanings()
        self._horizon = int(horizon)
        self.reset()
Ejemplo n.º 33
0
    def __init__(
        self,
        game="pong",
        frame_skip=4,
        num_img_obs=4,
        clip_reward=True,
        episodic_lives=False,  # !
        max_start_noops=30,
        repeat_action_probability=0.25,  # !
        horizon=27000,
        obs_size=84,  # square resize
        fire_on_reset=True,
    ):
        save__init__args(locals(), underscore=True)
        # ALE
        game_path = atari_py.get_game_path(game)
        if not os.path.exists(game_path):
            raise IOError("You asked for game {} but path {} does not "
                          " exist".format(game, game_path))
        self.ale = atari_py.ALEInterface()
        self.ale.setFloat(b'repeat_action_probability',
                          repeat_action_probability)
        self.ale.loadROM(game_path)

        # Spaces
        self._obs_size = obs_size
        self._action_set = self.ale.getMinimalActionSet()
        self._action_space = IntBox(low=0, high=len(self._action_set))
        obs_shape = (num_img_obs, self._obs_size, self._obs_size)
        self._observation_space = IntBox(low=0,
                                         high=256,
                                         shape=obs_shape,
                                         dtype="uint8")
        self._max_frame = self.ale.getScreenGrayscale()
        self._raw_frame_1 = self._max_frame.copy()
        self._raw_frame_2 = self._max_frame.copy()
        self._obs = np.zeros(shape=obs_shape, dtype="uint8")

        # Settings
        self._has_fire = "FIRE" in self.get_action_meanings()
        self._has_up = "UP" in self.get_action_meanings()
        self._horizon = int(horizon)
        self.reset()
Ejemplo n.º 34
0
 def __init__(self, args):
   # print("In init")
   self.device = args.device
   self.ale = atari_py.ALEInterface()
   self.ale.setInt('random_seed', args.seed)
   self.ale.setInt('max_num_frames_per_episode', args.max_episode_length)
   self.ale.setFloat('repeat_action_probability', 0)  # Disable sticky actions
   self.ale.setInt('frame_skip', 0)
   self.ale.setBool('color_averaging', False)
   self.ale.loadROM(atari_py.get_game_path(args.game))  # ROM loading must be done after setting options
   actions = self.ale.getMinimalActionSet()
   self.actions = dict([i, e] for i, e in zip(range(len(actions)), actions))
   self.lives = 0  # Life counter (used in DeepMind training)
   self.life_termination = False  # Used to check if resetting only from loss of life
   self.window = args.history_length  # Number of frames to concatenate
   self.state_buffer = deque([], maxlen=args.history_length)
   self.training = True  # Consistent with model training mode
   self.device = "cuda" if torch.cuda.is_available() else "cpu"
   self.clip_model, self.preprocess = clip.load("ViT-B/32", device=self.device)
Ejemplo n.º 35
0
from episodic_agent import *
from ale_experiment import *
import atari_py
game_path = atari_py.get_game_path('pong')
ale = atari_py.ALEInterface()
#ale.setFloat('repeat_action_probability',0.0)
ale.loadROM(game_path)
num_actions = len(ale.getMinimalActionSet())
agent = EpisodicControlAgent(4)
experiment = ALEExperiment(ale,agent,84,84,'scale',5000,10000,0,4,True,30,rng = np.random.RandomState(123456))
experiment.run()