def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.): """Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.""" utils.EzPickle.__init__(self, game, obs_type) assert obs_type in ('ram', 'image') self.game_path = atari_py.get_game_path(game) if not os.path.exists(self.game_path): raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path)) self._obs_type = obs_type self.frameskip = frameskip self.ale = atari_py.ALEInterface() self.viewer = None # Tune (or disable) ALE's action repeat: # https://github.com/openai/gym/issues/349 assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability) self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability) self._seed() (screen_width, screen_height) = self.ale.getScreenDims() self._buffer = np.empty((screen_height, screen_width, 4), dtype=np.uint8) self._action_set = self.ale.getMinimalActionSet() self.action_space = spaces.Discrete(len(self._action_set)) (screen_width,screen_height) = self.ale.getScreenDims() if self._obs_type == 'ram': self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255) elif self._obs_type == 'image': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3)) else: raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
def __init__(self): self._seed() self.viewer = None self.world = Box2D.b2World() self.moon = None self.lander = None self.particles = [] self.prev_reward = None high = np.array( [np.inf] * 8) # useful range is -1 .. +1, but spikes can be higher self.observation_space = spaces.Box(-high, high) if self.continuous: # Action is two floats [main engine, left-right engines]. # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power. # Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off self.action_space = spaces.Box(-1, +1, (2, )) else: # Nop, fire left engine, main engine, right engine self.action_space = spaces.Discrete(4) self._reset()
def __init__(self, model_path, frame_skip): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = mujoco_py.MjModel(fullpath) self.data = self.model.data self.viewer = None self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.init_qpos = self.model.data.qpos.ravel().copy() self.init_qvel = self.model.data.qvel.ravel().copy() observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) assert not done self.obs_dim = observation.size bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self._seed()
def __init__(self): self.max_speed=8 self.max_torque=2. self.dt=.05 self.viewer = None high = np.array([1., 1., self.max_speed]) self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,)) self.observation_space = spaces.Box(low=-high, high=high) self._seed()
def __init__(self): self._seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1])) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3))
def __init__(self): self._seed() self.viewer = None self.world = Box2D.b2World() self.terrain = None self.hull = None self.prev_shaping = None self._reset() high = np.array([np.inf]*24) self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1])) self.observation_space = spaces.Box(-high, high)
def __init__(self): self.gravity = 9.8 self.masscart = 1.0 self.masspole = 0.1 self.total_mass = (self.masspole + self.masscart) self.length = 0.5 # actually half the pole's length self.polemass_length = (self.masspole * self.length) self.force_mag = 10.0 self.tau = 0.02 # seconds between state updates # Angle at which to fail the episode self.theta_threshold_radians = 12 * 2 * math.pi / 360 self.x_threshold = 2.4 # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds high = np.array([ self.x_threshold * 2, np.finfo(np.float32).max, self.theta_threshold_radians * 2, np.finfo(np.float32).max ]) self.action_space = spaces.Discrete(2) self.observation_space = spaces.Box(-high, high) self._seed() self.viewer = None self.state = None self.steps_beyond_done = None
def __init__(self): self.viewer = None high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2]) low = -high self.observation_space = spaces.Box(low, high) self.action_space = spaces.Discrete(3) self.state = None self._seed()
def __init__(self): self.min_action = -1.0 self.max_action = 1.0 self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 self.goal_position = 0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version self.power = 0.0015 self.low_state = np.array([self.min_position, -self.max_speed]) self.high_state = np.array([self.max_position, self.max_speed]) self.viewer = None self.action_space = spaces.Box(self.min_action, self.max_action, shape = (1,)) self.observation_space = spaces.Box(self.low_state, self.high_state) self._seed() self.reset()
def __init__(self): self.range = 1000 # Randomly selected number is within +/- this value self.bounds = 10000 self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds])) self.observation_space = spaces.Discrete(4) self.number = 0 self.guess_count = 0 self.guess_max = 200 self.observation = 0 self._seed() self._reset()
def __init__(self): self.range = 1000 # +/- value the randomly select number can be between self.bounds = 2000 # Action space bounds self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds])) self.observation_space = spaces.Discrete(4) self.number = 0 self.guess_count = 0 self.guess_max = 200 self.observation = 0 self._seed() self._reset()
def __init__(self, player_color, opponent, observation_type, illegal_move_mode, board_size): """ Args: player_color: Stone color for the agent. Either 'black' or 'white' opponent: An opponent policy observation_type: State encoding illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose' """ assert isinstance( board_size, int) and board_size >= 1, 'Invalid board size: {}'.format( board_size) self.board_size = board_size self._seed() colormap = { 'black': pachi_py.BLACK, 'white': pachi_py.WHITE, } try: self.player_color = colormap[player_color] except KeyError: raise error.Error( "player_color must be 'black' or 'white', not {}".format( player_color)) self.opponent_policy = None self.opponent = opponent assert observation_type in ['image3c'] self.observation_type = observation_type assert illegal_move_mode in ['lose', 'raise'] self.illegal_move_mode = illegal_move_mode if self.observation_type != 'image3c': raise error.Error('Unsupported observation type: {}'.format( self.observation_type)) shape = pachi_py.CreateBoard(self.board_size).encode().shape self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape)) # One action for each board position, pass, and resign self.action_space = spaces.Discrete(self.board_size**2 + 2) # Filled in by _reset() self.state = None self.done = True
def __init__(self): self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 self.goal_position = 0.5 self.low = np.array([self.min_position, -self.max_speed]) self.high = np.array([self.max_position, self.max_speed]) self.viewer = None self.action_space = spaces.Discrete(3) self.observation_space = spaces.Box(self.low, self.high) self._seed() self.reset()
def __init__(self, player_color, opponent, observation_type, illegal_move_mode, board_size): """ Args: player_color: Stone color for the agent. Either 'black' or 'white' opponent: An opponent policy observation_type: State encoding illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose' board_size: size of the Hex board """ assert isinstance( board_size, int) and board_size >= 1, 'Invalid board size: {}'.format( board_size) self.board_size = board_size colormap = { 'black': HexEnv.BLACK, 'white': HexEnv.WHITE, } try: self.player_color = colormap[player_color] except KeyError: raise error.Error( "player_color must be 'black' or 'white', not {}".format( player_color)) self.opponent = opponent assert observation_type in ['numpy3c'] self.observation_type = observation_type assert illegal_move_mode in ['lose', 'raise'] self.illegal_move_mode = illegal_move_mode if self.observation_type != 'numpy3c': raise error.Error('Unsupported observation type: {}'.format( self.observation_type)) # One action for each board position and resign self.action_space = spaces.Discrete(self.board_size**2 + 1) observation = self.reset() self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape)) self._seed()
def __init__(self, natural=False): """ Initialize environment """ # I use array of len 1 to store constants (otherwise there were some errors) self.action_space = spaces.Tuple(( spaces.Box(-5.0, 0.0, 1), # learning rate spaces.Box(-7.0, -2.0, 1), # decay spaces.Box(-5.0, 0.0, 1), # momentum spaces.Box(2, 8, 1), # batch size spaces.Box(-6.0, 1.0, 1), # l1 reg spaces.Box(-6.0, 1.0, 1), # l2 reg spaces.Box(0.0, 1.0, (5, 2)), # convolutional layer parameters spaces.Box(0.0, 1.0, (2, 2)), # fully connected layer parameters )) # observation features, in order: num of instances, num of labels, # validation accuracy after training with given parameters self.observation_space = spaces.Box(-1e5, 1e5, 2) # validation accuracy # Start the first game self._reset()
def __init__(self, natural=False): """ Initialize environment """ # I use array of len 1 to store constants (otherwise there were some errors) self.action_space = spaces.Tuple(( spaces.Box(-5.0, 0.0, 1), # learning rate spaces.Box(-7.0, -2.0, 1), # decay spaces.Box(-5.0, 0.0, 1), # momentum spaces.Box(2, 8, 1), # batch size spaces.Box(-6.0, 1.0, 1), # l1 reg spaces.Box(-6.0, 1.0, 1), # l2 reg )) # observation features, in order: num of instances, num of labels, # number of filter in part A / B of neural net, num of neurons in # output layer, validation accuracy after training with given # parameters self.observation_space = spaces.Box(-1e5, 1e5, 6) # validation accuracy # Start the first game self._reset()