def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.): """Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.""" utils.EzPickle.__init__(self, game, obs_type) assert obs_type in ('ram', 'image') self.game_path = atari_py.get_game_path(game) if not os.path.exists(self.game_path): raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path)) self._obs_type = obs_type self.frameskip = frameskip self.ale = atari_py.ALEInterface() self.viewer = None # Tune (or disable) ALE's action repeat: # https://github.com/openai/gym/issues/349 assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability) self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability) self._seed() (screen_width, screen_height) = self.ale.getScreenDims() self._action_set = self.ale.getMinimalActionSet() self.action_space = spaces.Discrete(len(self._action_set)) (screen_width,screen_height) = self.ale.getScreenDims() if self._obs_type == 'ram': self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255) elif self._obs_type == 'image': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3)) else: raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
def __init__(self, model_path, frame_skip): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = mujoco_py.load_model_from_path(fullpath) self.sim = mujoco_py.MjSim(self.model) self.data = self.sim.data self.viewer = None self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.init_qpos = self.sim.data.qpos.ravel().copy() self.init_qvel = self.sim.data.qvel.ravel().copy() observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) assert not done self.obs_dim = observation.size bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self._seed()
def __init__(self): self.max_speed = 8 self.max_torque = 2. self.dt = .05 self.viewer = None high = np.array([1., 1., self.max_speed]) self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1, )) self.observation_space = spaces.Box(low=-high, high=high) self._seed()
def __init__(self, initialWealth=25.0, edgePriorAlpha=7, edgePriorBeta=3, maxWealthAlpha=5.0, maxWealthM=200.0, maxRoundsMean=300.0, maxRoundsSD=25.0, reseed=True): # store the hyperparameters for passing back into __init__() during resets so the same hyperparameters govern the next game's parameters, as the user expects: TODO: this is boilerplate, is there any more elegant way to do this? self.initialWealth = float(initialWealth) self.edgePriorAlpha = edgePriorAlpha self.edgePriorBeta = edgePriorBeta self.maxWealthAlpha = maxWealthAlpha self.maxWealthM = maxWealthM self.maxRoundsMean = maxRoundsMean self.maxRoundsSD = maxRoundsSD # draw this game's set of parameters: edge = prng.np_random.beta(edgePriorAlpha, edgePriorBeta) maxWealth = round( genpareto.rvs(maxWealthAlpha, maxWealthM, random_state=prng.np_random)) maxRounds = int( round(prng.np_random.normal(maxRoundsMean, maxRoundsSD))) # add an additional global variable which is the sufficient statistic for the Pareto distribution on wealth cap; # alpha doesn't update, but x_m does, and simply is the highest wealth count we've seen to date: self.maxEverWealth = float(self.initialWealth) # for the coinflip edge, it is total wins/losses: self.wins = 0 self.losses = 0 # for the number of rounds, we need to remember how many rounds we've played: self.roundsElapsed = 0 # the rest proceeds as before: self.action_space = spaces.Discrete(int(maxWealth * 100)) self.observation_space = spaces.Tuple(( spaces.Box(0, maxWealth, shape=[1]), # current wealth spaces.Discrete(maxRounds + 1), # rounds elapsed spaces.Discrete(maxRounds + 1), # wins spaces.Discrete(maxRounds + 1), # losses spaces.Box(0, maxWealth, [1]))) # maximum observed wealth self.reward_range = (0, maxWealth) self.edge = edge self.wealth = self.initialWealth self.maxRounds = maxRounds self.rounds = self.maxRounds self.maxWealth = maxWealth if reseed or not hasattr(self, 'np_random'): self._seed()
def __init__(self): self._seed() self.viewer = None self.world = Box2D.b2World() self.terrain = None self.hull = None self.prev_shaping = None self._reset() high = np.array([np.inf]*24) self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1])) self.observation_space = spaces.Box(-high, high)
def __init__(self): self.gravity = 9.8 self.masscart = 1.0 self.masspole = 0.1 self.total_mass = (self.masspole + self.masscart) self.length = 0.5 # actually half the pole's length self.polemass_length = (self.masspole * self.length) self.force_mag = 10.0 self.tau = 0.02 # seconds between state updates # Angle at which to fail the episode self.theta_threshold_radians = 12 * 2 * math.pi / 360 self.x_threshold = 2.4 # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds high = np.array([ self.x_threshold * 2, np.finfo(np.float32).max, self.theta_threshold_radians * 2, np.finfo(np.float32).max ]) self.action_space = spaces.Discrete(2) self.observation_space = spaces.Box(-high, high) self._seed() self.viewer = None self.state = None self.steps_beyond_done = None
def __init__(self): self.viewer = None high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2]) low = -high self.observation_space = spaces.Box(low, high) self.action_space = spaces.Discrete(3) self.state = None self._seed()
def __init__(self): self._seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([+1, +1, +1])) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3))
def __init__(self): self.min_action = -1.0 self.max_action = 1.0 self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 self.goal_position = 0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version self.power = 0.0015 self.low_state = np.array([self.min_position, -self.max_speed]) self.high_state = np.array([self.max_position, self.max_speed]) self.viewer = None self.action_space = spaces.Box(self.min_action, self.max_action, shape = (1,)) self.observation_space = spaces.Box(self.low_state, self.high_state) self._seed() self.reset()
def __init__(self, env, k): """Stack k last frames. Returns lazy array, which is much more memory efficient. See Also -------- baselines.common.atari_wrappers.LazyFrames """ gym.Wrapper.__init__(self, env) self.k = k self.frames = deque([], maxlen=k) shp = env.observation_space.shape self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0]*k, shp[1], shp[2]), dtype=np.float32)
def __init__(self): self.range = 1000 # Randomly selected number is within +/- this value self.bounds = 10000 self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds])) self.observation_space = spaces.Discrete(4) self.number = 0 self.guess_count = 0 self.guess_max = 200 self.observation = 0 self._seed() self._reset()
def __init__(self): self.range = 1000 # +/- value the randomly select number can be between self.bounds = 2000 # Action space bounds self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds])) self.observation_space = spaces.Discrete(4) self.number = 0 self.guess_count = 0 self.guess_max = 200 self.observation = 0 self._seed() self._reset()
def __init__(self, player_color, opponent, observation_type, illegal_move_mode, board_size): """ Args: player_color: Stone color for the agent. Either 'black' or 'white' opponent: An opponent policy observation_type: State encoding illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose' """ assert isinstance( board_size, int) and board_size >= 1, 'Invalid board size: {}'.format( board_size) self.board_size = board_size self._seed() colormap = { 'black': pachi_py.BLACK, 'white': pachi_py.WHITE, } try: self.player_color = colormap[player_color] except KeyError: raise error.Error( "player_color must be 'black' or 'white', not {}".format( player_color)) self.opponent_policy = None self.opponent = opponent assert observation_type in ['image3c'] self.observation_type = observation_type assert illegal_move_mode in ['lose', 'raise'] self.illegal_move_mode = illegal_move_mode if self.observation_type != 'image3c': raise error.Error('Unsupported observation type: {}'.format( self.observation_type)) shape = pachi_py.CreateBoard(self.board_size).encode().shape self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape)) # One action for each board position, pass, and resign self.action_space = spaces.Discrete(self.board_size**2 + 2) # Filled in by _reset() self.state = None self.done = True
def __init__(self): self._seed() self.viewer = None self.world = Box2D.b2World() self.moon = None self.lander = None self.particles = [] self.prev_reward = None high = np.array([np.inf]*8) # useful range is -1 .. +1, but spikes can be higher self.observation_space = spaces.Box(-high, high) if self.continuous: # Action is two floats [main engine, left-right engines]. # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power. # Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off self.action_space = spaces.Box(-1, +1, (2,)) else: # Nop, fire left engine, main engine, right engine self.action_space = spaces.Discrete(4) self._reset()
def __init__(self): self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 self.goal_position = 0.5 self.low = np.array([self.min_position, -self.max_speed]) self.high = np.array([self.max_position, self.max_speed]) self.viewer = None self.action_space = spaces.Discrete(3) self.observation_space = spaces.Box(self.low, self.high) self._seed() self.reset()
def __init__(self, player_color, opponent, observation_type, illegal_move_mode, board_size): """ Args: player_color: Stone color for the agent. Either 'black' or 'white' opponent: An opponent policy observation_type: State encoding illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose' board_size: size of the Hex board """ assert isinstance( board_size, int) and board_size >= 1, 'Invalid board size: {}'.format( board_size) self.board_size = board_size colormap = { 'black': HexEnv.BLACK, 'white': HexEnv.WHITE, } try: self.player_color = colormap[player_color] except KeyError: raise error.Error( "player_color must be 'black' or 'white', not {}".format( player_color)) self.opponent = opponent assert observation_type in ['numpy3c'] self.observation_type = observation_type assert illegal_move_mode in ['lose', 'raise'] self.illegal_move_mode = illegal_move_mode if self.observation_type != 'numpy3c': raise error.Error('Unsupported observation type: {}'.format( self.observation_type)) # One action for each board position and resign self.action_space = spaces.Discrete(self.board_size**2 + 1) observation = self.reset() self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape)) self._seed()
def __init__(self, natural=False): """ Initialize environment """ # I use array of len 1 to store constants (otherwise there were some errors) self.action_space = spaces.Tuple(( spaces.Box(-5.0, 0.0, 1), # learning rate spaces.Box(-7.0, -2.0, 1), # decay spaces.Box(-5.0, 0.0, 1), # momentum spaces.Box(2, 8, 1), # batch size spaces.Box(-6.0, 1.0, 1), # l1 reg spaces.Box(-6.0, 1.0, 1), # l2 reg spaces.Box(0.0, 1.0, (5, 2)), # convolutional layer parameters spaces.Box(0.0, 1.0, (2, 2)), # fully connected layer parameters )) # observation features, in order: num of instances, num of labels, # validation accuracy after training with given parameters self.observation_space = spaces.Box(-1e5, 1e5, 2) # validation accuracy # Start the first game self._reset()
def __init__(self, initialWealth=25.0, edge=0.6, maxWealth=250.0, maxRounds=300): self.action_space = spaces.Discrete(int( maxWealth * 100)) # betting in penny increments self.observation_space = spaces.Tuple(( spaces.Box(0, maxWealth, [1]), # (w,b) spaces.Discrete(maxRounds + 1))) self.reward_range = (0, maxWealth) self.edge = edge self.wealth = initialWealth self.initialWealth = initialWealth self.maxRounds = maxRounds self.maxWealth = maxWealth self._seed() self._reset()
def __init__(self, natural=False): """ Initialize environment """ # I use array of len 1 to store constants (otherwise there were some errors) self.action_space = spaces.Tuple(( spaces.Box(-5.0, 0.0, 1), # learning rate spaces.Box(-7.0, -2.0, 1), # decay spaces.Box(-5.0, 0.0, 1), # momentum spaces.Box(2, 8, 1), # batch size spaces.Box(-6.0, 1.0, 1), # l1 reg spaces.Box(-6.0, 1.0, 1), # l2 reg )) # observation features, in order: num of instances, num of labels, # number of filter in part A / B of neural net, num of neurons in # output layer, validation accuracy after training with given # parameters self.observation_space = spaces.Box(-1e5, 1e5, 6) # validation accuracy # Start the first game self._reset()
def __init__(self, env=None): super(ProcessFrame84, self).__init__(env) self.observation_space = spaces.Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
def __init__(self, env): super(CropAtari, self).__init__(env) self.observation_space = gym_spaces.Box(0, 255, shape=(ATARI_HEIGHT, ATARI_WIDTH, 3))