Ejemplo n.º 1
0
    def __init__(self,
                 initial_wealth=25.0,
                 edge_prior_alpha=7,
                 edge_prior_beta=3,
                 max_wealth_alpha=5.0,
                 max_wealth_m=200.0,
                 max_rounds_mean=300.0,
                 max_rounds_sd=25.0,
                 reseed=True):
        # store the hyper-parameters for passing back into __init__() during resets so
        # the same hyper-parameters govern the next game's parameters, as the user
        # expects:
        # TODO: this is boilerplate, is there any more elegant way to do this?
        self.initial_wealth = float(initial_wealth)
        self.edge_prior_alpha = edge_prior_alpha
        self.edge_prior_beta = edge_prior_beta
        self.max_wealth_alpha = max_wealth_alpha
        self.max_wealth_m = max_wealth_m
        self.max_rounds_mean = max_rounds_mean
        self.max_rounds_sd = max_rounds_sd

        if reseed or not hasattr(self, 'np_random'):
            self.seed()

        # draw this game's set of parameters:
        edge = self.np_random.beta(edge_prior_alpha, edge_prior_beta)
        max_wealth = round(
            genpareto.rvs(max_wealth_alpha,
                          max_wealth_m,
                          random_state=self.np_random))
        max_rounds = int(
            round(self.np_random.normal(max_rounds_mean, max_rounds_sd)))

        # add an additional global variable which is the sufficient statistic for the
        # Pareto distribution on wealth cap; alpha doesn't update, but x_m does, and
        # simply is the highest wealth count we've seen to date:
        self.max_ever_wealth = float(self.initial_wealth)
        # for the coinflip edge, it is total wins/losses:
        self.wins = 0
        self.losses = 0
        # for the number of rounds, we need to remember how many rounds we've played:
        self.rounds_elapsed = 0

        # the rest proceeds as before:
        self.action_space = spaces.Discrete(int(max_wealth * 100))
        self.observation_space = spaces.Tuple((
            spaces.Box(0, max_wealth, shape=[1],
                       dtype=np.float32),  # current wealth
            spaces.Discrete(max_rounds + 1),  # rounds elapsed
            spaces.Discrete(max_rounds + 1),  # wins
            spaces.Discrete(max_rounds + 1),  # losses
            spaces.Box(0, max_wealth, [1],
                       dtype=np.float32)))  # maximum observed wealth
        self.reward_range = (0, max_wealth)
        self.edge = edge
        self.wealth = self.initial_wealth
        self.max_rounds = max_rounds
        self.rounds = self.max_rounds
        self.max_wealth = max_wealth
Ejemplo n.º 2
0
 def __init__(self, n=5, slip=0.2, small=2, large=10):
     self.n = n
     self.slip = slip  # probability of 'slipping' an action
     self.small = small  # payout for 'backwards' action
     self.large = large  # payout at end of chain for 'forwards' action
     self.state = 0  # Start at beginning of the chain
     self.action_space = spaces.Discrete(2)
     self.observation_space = spaces.Discrete(self.n)
     self.seed()
Ejemplo n.º 3
0
    def __init__(self, nS, nA, P, isd):
        self.P = P
        self.isd = isd
        self.lastaction = None  # for rendering
        self.nS = nS
        self.nA = nA

        self.action_space = spaces.Discrete(self.nA)
        self.observation_space = spaces.Discrete(self.nS)

        self.seed()
        self.reset()
Ejemplo n.º 4
0
    def __init__(self, natural=False):
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Tuple((
            spaces.Discrete(32),
            spaces.Discrete(11),
            spaces.Discrete(2)))
        self.seed()

        # Flag to payout 1.5 on a "natural" blackjack win, like casino rules
        # Ref: http://www.bicyclecards.com/how-to-play/blackjack/
        self.natural = natural
        # Start the first game
        self.reset()
Ejemplo n.º 5
0
    def __init__(self):
        EzPickle.__init__(self)
        self.seed()
        self.viewer = None

        self.world = Box2D.b2World()
        self.moon = None
        self.lander = None
        self.particles = []

        self.prev_reward = None

        # useful range is -1 .. +1, but spikes can be higher
        self.observation_space = spaces.Box(-np.inf,
                                            np.inf,
                                            shape=(8, ),
                                            dtype=np.float32)

        if self.continuous:
            # Action is two floats [main engine, left-right engines].
            # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
            # Left-right:  -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
            self.action_space = spaces.Box(-1, +1, (2, ), dtype=np.float32)
        else:
            # Nop, fire left engine, main engine, right engine
            self.action_space = spaces.Discrete(4)

        self.reset()
Ejemplo n.º 6
0
    def __init__(self):
        self.gravity = 9.8
        self.masscart = 1.0
        self.masspole = 0.1
        self.total_mass = (self.masspole + self.masscart)
        self.length = 0.5 # actually half the pole's length
        self.polemass_length = (self.masspole * self.length)
        self.force_mag = 10.0
        self.tau = 0.02  # seconds between state updates
        self.kinematics_integrator = 'euler'

        # Angle at which to fail the episode
        self.theta_threshold_radians = 12 * 2 * math.pi / 360
        self.x_threshold = 2.4

        # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
        high = np.array([
            self.x_threshold * 2,
            np.finfo(np.float32).max,
            self.theta_threshold_radians * 2,
            np.finfo(np.float32).max])

        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Box(-high, high, dtype=np.float32)

        self.seed()
        self.viewer = None
        self.state = None

        self.steps_beyond_done = None
Ejemplo n.º 7
0
 def __init__(self):
     self.viewer = None
     high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
     low = -high
     self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
     self.action_space = spaces.Discrete(3)
     self.state = None
     self.seed()
Ejemplo n.º 8
0
    def __init__(self):
        self.seed()
        self.viewer = None

        self.observation_space = spaces.Box(0,
                                            255, (FIELD_H, FIELD_W, 3),
                                            dtype=np.uint8)
        self.action_space = spaces.Discrete(3)

        self.reset()
Ejemplo n.º 9
0
    def __init__(self,
                 game='pong',
                 obs_type='ram',
                 frameskip=(2, 5),
                 repeat_action_probability=0.,
                 full_action_space=False):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, game, obs_type, frameskip,
                                repeat_action_probability)
        assert obs_type in ('ram', 'image')

        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist' %
                          (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym_wmgds/issues/349
        assert isinstance(
            repeat_action_probability,
            (float, int)), "Invalid repeat_action_probability: {!r}".format(
                repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'),
                          repeat_action_probability)

        self.seed()

        self._action_set = (self.ale.getLegalActionSet() if full_action_space
                            else self.ale.getMinimalActionSet())
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width, screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                dtype=np.uint8,
                                                shape=(128, ))
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(screen_height,
                                                       screen_width, 3),
                                                dtype=np.uint8)
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self._obs_type))
Ejemplo n.º 10
0
    def __init__(self,
                 initial_wealth=25.0,
                 edge=0.6,
                 max_wealth=250.0,
                 max_rounds=300):

        self.action_space = spaces.Discrete(int(max_wealth *
                                                100))  # betting in penny
        # increments
        self.observation_space = spaces.Tuple((
            spaces.Box(0, max_wealth, [1], dtype=np.float32),  # (w,b)
            spaces.Discrete(max_rounds + 1)))
        self.reward_range = (0, max_wealth)
        self.edge = edge
        self.wealth = initial_wealth
        self.initial_wealth = initial_wealth
        self.max_rounds = max_rounds
        self.max_wealth = max_wealth
        self.np_random = None
        self.rounds = None
        self.seed()
        self.reset()
Ejemplo n.º 11
0
 def __init__(self):
     self.seed()
     self.viewer = None
     self.observation_space = spaces.Box(0,
                                         255, (FIELD_H, FIELD_W, 3),
                                         dtype=np.uint8)
     self.action_space = spaces.Discrete(10)
     self.bogus_mnist = np.zeros((10, 6, 6), dtype=np.uint8)
     for digit in range(10):
         for y in range(6):
             self.bogus_mnist[digit, y, :] = [
                 ord(char) for char in bogus_mnist[digit][y]
             ]
     self.reset()
Ejemplo n.º 12
0
    def __init__(self):
        self.range = 1000  # Randomly selected number is within +/- this value
        self.bounds = 10000

        self.action_space = spaces.Box(low=np.array([-self.bounds]),
                                       high=np.array([self.bounds]),
                                       dtype=np.float32)
        self.observation_space = spaces.Discrete(4)

        self.number = 0
        self.guess_count = 0
        self.guess_max = 200
        self.observation = 0

        self.seed()
        self.reset()
Ejemplo n.º 13
0
    def __init__(self):
        self.range = 1000  # +/- value the randomly select number can be between
        self.bounds = 2000  # Action space bounds

        self.action_space = spaces.Box(low=np.array([-self.bounds]),
                                       high=np.array([self.bounds]),
                                       dtype=np.float32)
        self.observation_space = spaces.Discrete(4)

        self.number = 0
        self.guess_count = 0
        self.guess_max = 200
        self.observation = 0

        self.seed()
        self.reset()
Ejemplo n.º 14
0
    def __init__(self):
        self.min_position = -1.2
        self.max_position = 0.6
        self.max_speed = 0.07
        self.goal_position = 0.5

        self.low = np.array([self.min_position, -self.max_speed])
        self.high = np.array([self.max_position, self.max_speed])

        self.viewer = None

        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(self.low,
                                            self.high,
                                            dtype=np.float32)

        self.seed()
        self.reset()
Ejemplo n.º 15
0
 def __init__(self, spots=37):
     self.n = spots + 1
     self.action_space = spaces.Discrete(self.n)
     self.observation_space = spaces.Discrete(1)
     self.seed()