def __init__(self, n=5, slip=0.2, small=2, large=10): self.n = n self.slip = slip # probability of 'slipping' an action self.small = small # payout for 'backwards' action self.large = large # payout at end of chain for 'forwards' action self.state = 0 # Start at beginning of the chain self.action_space = spaces.Discrete(2) self.observation_space = spaces.Discrete(self.n) self._seed()
def __init__(self, natural=False): self.action_space = spaces.Discrete(2) self.observation_space = spaces.Tuple( (spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2))) self._seed() # Flag to payout 1.5 on a "natural" blackjack win, like casino rules # Ref: http://www.bicyclecards.com/how-to-play/blackjack/ self.natural = natural # Start the first game self._reset()
def __init__(self, nS, nA, P, isd): self.P = P self.isd = isd self.lastaction = None # for rendering self.nS = nS self.nA = nA self.action_space = spaces.Discrete(self.nA) self.observation_space = spaces.Discrete(self.nS) self._seed() self._reset()
def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.): """Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.""" utils.EzPickle.__init__(self, game, obs_type) assert obs_type in ('ram', 'image') self.game_path = atari_py.get_game_path(game) if not os.path.exists(self.game_path): raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path)) self._obs_type = obs_type self.frameskip = frameskip self.ale = atari_py.ALEInterface() self.viewer = None # Tune (or disable) ALE's action repeat: # https://github.com/openai/gym/issues/349 assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability) self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability) self._seed() (screen_width, screen_height) = self.ale.getScreenDims() self._buffer = np.empty((screen_height, screen_width, 4), dtype=np.uint8) self._action_set = self.ale.getMinimalActionSet() self.action_space = spaces.Discrete(len(self._action_set)) (screen_width,screen_height) = self.ale.getScreenDims() if self._obs_type == 'ram': self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255) elif self._obs_type == 'image': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3)) else: raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
def __init__(self): self._seed() self.viewer = None self.world = Box2D.b2World() self.moon = None self.lander = None self.particles = [] self.prev_reward = None high = np.array( [np.inf] * 8) # useful range is -1 .. +1, but spikes can be higher self.observation_space = spaces.Box(-high, high) if self.continuous: # Action is two floats [main engine, left-right engines]. # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power. # Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off self.action_space = spaces.Box(-1, +1, (2, )) else: # Nop, fire left engine, main engine, right engine self.action_space = spaces.Discrete(4) self._reset()
def __init__(self): self.gravity = 9.8 self.masscart = 1.0 self.masspole = 0.1 self.total_mass = (self.masspole + self.masscart) self.length = 0.5 # actually half the pole's length self.polemass_length = (self.masspole * self.length) self.force_mag = 10.0 self.tau = 0.02 # seconds between state updates # Angle at which to fail the episode self.theta_threshold_radians = 12 * 2 * math.pi / 360 self.x_threshold = 2.4 # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds high = np.array([ self.x_threshold * 2, np.finfo(np.float32).max, self.theta_threshold_radians * 2, np.finfo(np.float32).max ]) self.action_space = spaces.Discrete(2) self.observation_space = spaces.Box(-high, high) self._seed() self.viewer = None self.state = None self.steps_beyond_done = None
def __init__(self): super(OffSwitchCartpoleProbEnv, self).__init__() self.observation_space = spaces.Tuple( (spaces.Discrete(2), self.observation_space)) self.threshold_crossed = False # number of episodes in which the cart crossed the left/right threshold (first). self.num_crosses = [0., 0.]
def __init__(self): self.viewer = None high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2]) low = -high self.observation_space = spaces.Box(low, high) self.action_space = spaces.Discrete(3) self.state = None self._seed()
def __init__(self): self.range = 1000 # Randomly selected number is within +/- this value self.bounds = 10000 self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds])) self.observation_space = spaces.Discrete(4) self.number = 0 self.guess_count = 0 self.guess_max = 200 self.observation = 0 self._seed() self._reset()
def __init__(self): self.range = 1000 # +/- value the randomly select number can be between self.bounds = 2000 # Action space bounds self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds])) self.observation_space = spaces.Discrete(4) self.number = 0 self.guess_count = 0 self.guess_max = 200 self.observation = 0 self._seed() self._reset()
def __init__(self, player_color, opponent, observation_type, illegal_move_mode, board_size): """ Args: player_color: Stone color for the agent. Either 'black' or 'white' opponent: An opponent policy observation_type: State encoding illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose' """ assert isinstance( board_size, int) and board_size >= 1, 'Invalid board size: {}'.format( board_size) self.board_size = board_size self._seed() colormap = { 'black': pachi_py.BLACK, 'white': pachi_py.WHITE, } try: self.player_color = colormap[player_color] except KeyError: raise error.Error( "player_color must be 'black' or 'white', not {}".format( player_color)) self.opponent_policy = None self.opponent = opponent assert observation_type in ['image3c'] self.observation_type = observation_type assert illegal_move_mode in ['lose', 'raise'] self.illegal_move_mode = illegal_move_mode if self.observation_type != 'image3c': raise error.Error('Unsupported observation type: {}'.format( self.observation_type)) shape = pachi_py.CreateBoard(self.board_size).encode().shape self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape)) # One action for each board position, pass, and resign self.action_space = spaces.Discrete(self.board_size**2 + 2) # Filled in by _reset() self.state = None self.done = True
def __init__(self): self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 self.goal_position = 0.5 self.low = np.array([self.min_position, -self.max_speed]) self.high = np.array([self.max_position, self.max_speed]) self.viewer = None self.action_space = spaces.Discrete(3) self.observation_space = spaces.Box(self.low, self.high) self._seed() self.reset()
def __init__(self, player_color, opponent, observation_type, illegal_move_mode, board_size): """ Args: player_color: Stone color for the agent. Either 'black' or 'white' opponent: An opponent policy observation_type: State encoding illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose' board_size: size of the Hex board """ assert isinstance( board_size, int) and board_size >= 1, 'Invalid board size: {}'.format( board_size) self.board_size = board_size colormap = { 'black': HexEnv.BLACK, 'white': HexEnv.WHITE, } try: self.player_color = colormap[player_color] except KeyError: raise error.Error( "player_color must be 'black' or 'white', not {}".format( player_color)) self.opponent = opponent assert observation_type in ['numpy3c'] self.observation_type = observation_type assert illegal_move_mode in ['lose', 'raise'] self.illegal_move_mode = illegal_move_mode if self.observation_type != 'numpy3c': raise error.Error('Unsupported observation type: {}'.format( self.observation_type)) # One action for each board position and resign self.action_space = spaces.Discrete(self.board_size**2 + 1) observation = self.reset() self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape)) self._seed()
def __init__(self): self.action_space = spaces.Discrete(2) self.observation_space = spaces.Discrete(1) self._seed() self._reset()
def __init__(self): self.action_space = spaces.Discrete(1) self.observation_space = spaces.Discrete(1)
def __init__(self, spots=37): self.n = spots + 1 self.action_space = spaces.Discrete(self.n) self.observation_space = spaces.Discrete(1) self._seed()