Ejemplo n.º 1
0
 def __init__(self, n=5, slip=0.2, small=2, large=10):
     self.n = n
     self.slip = slip  # probability of 'slipping' an action
     self.small = small  # payout for 'backwards' action
     self.large = large  # payout at end of chain for 'forwards' action
     self.state = 0  # Start at beginning of the chain
     self.action_space = spaces.Discrete(2)
     self.observation_space = spaces.Discrete(self.n)
     self.seed()
Ejemplo n.º 2
0
    def __init__(self, nS, nA, P, isd):
        self.P = P
        self.isd = isd
        self.lastaction = None  # for rendering
        self.nS = nS
        self.nA = nA

        self.action_space = spaces.Discrete(self.nA)
        self.observation_space = spaces.Discrete(self.nS)

        self.seed()
        self.reset()
Ejemplo n.º 3
0
    def __init__(self,
                 initialWealth=25.0,
                 edgePriorAlpha=7,
                 edgePriorBeta=3,
                 maxWealthAlpha=5.0,
                 maxWealthM=200.0,
                 maxRoundsMean=300.0,
                 maxRoundsSD=25.0,
                 reseed=True):
        # store the hyperparameters for passing back into __init__() during resets so the same hyperparameters govern the next game's parameters, as the user expects: TODO: this is boilerplate, is there any more elegant way to do this?
        self.initialWealth = float(initialWealth)
        self.edgePriorAlpha = edgePriorAlpha
        self.edgePriorBeta = edgePriorBeta
        self.maxWealthAlpha = maxWealthAlpha
        self.maxWealthM = maxWealthM
        self.maxRoundsMean = maxRoundsMean
        self.maxRoundsSD = maxRoundsSD

        # draw this game's set of parameters:
        edge = prng.np_random.beta(edgePriorAlpha, edgePriorBeta)
        maxWealth = round(
            genpareto.rvs(maxWealthAlpha,
                          maxWealthM,
                          random_state=prng.np_random))
        maxRounds = int(
            round(prng.np_random.normal(maxRoundsMean, maxRoundsSD)))

        # add an additional global variable which is the sufficient statistic for the Pareto distribution on wealth cap;
        # alpha doesn't update, but x_m does, and simply is the highest wealth count we've seen to date:
        self.maxEverWealth = float(self.initialWealth)
        # for the coinflip edge, it is total wins/losses:
        self.wins = 0
        self.losses = 0
        # for the number of rounds, we need to remember how many rounds we've played:
        self.roundsElapsed = 0

        # the rest proceeds as before:
        self.action_space = spaces.Discrete(int(maxWealth * 100))
        self.observation_space = spaces.Tuple((
            spaces.Box(0, maxWealth, shape=[1],
                       dtype=np.float32),  # current wealth
            spaces.Discrete(maxRounds + 1),  # rounds elapsed
            spaces.Discrete(maxRounds + 1),  # wins
            spaces.Discrete(maxRounds + 1),  # losses
            spaces.Box(0, maxWealth, [1],
                       dtype=np.float32)))  # maximum observed wealth
        self.reward_range = (0, maxWealth)
        self.edge = edge
        self.wealth = self.initialWealth
        self.maxRounds = maxRounds
        self.rounds = self.maxRounds
        self.maxWealth = maxWealth
        if reseed or not hasattr(self, 'np_random'): self.seed()
Ejemplo n.º 4
0
    def __init__(self, natural=False):
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Tuple((
            spaces.Discrete(32),
            spaces.Discrete(11),
            spaces.Discrete(2)))
        self.seed()

        # Flag to payout 1.5 on a "natural" blackjack win, like casino rules
        # Ref: http://www.bicyclecards.com/how-to-play/blackjack/
        self.natural = natural
        # Start the first game
        self.reset()
Ejemplo n.º 5
0
    def __init__(self):
        self.gravity = 9.8
        self.masscart = 1.0
        self.masspole = 0.1
        self.total_mass = (self.masspole + self.masscart)
        self.length = 0.5  # actually half the pole's length
        self.polemass_length = (self.masspole * self.length)
        self.force_mag = 10.0
        self.tau = 0.02  # seconds between state updates
        self.kinematics_integrator = 'euler'

        # Angle at which to fail the episode
        self.theta_threshold_radians = 12 * 2 * math.pi / 360
        self.x_threshold = 2.4

        # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
        high = np.array([
            self.x_threshold * 2,
            np.finfo(np.float32).max, self.theta_threshold_radians * 2,
            np.finfo(np.float32).max
        ])

        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Box(-high, high, dtype=np.float32)

        self.seed()
        self.viewer = None
        self.state = None

        self.steps_beyond_done = None
Ejemplo n.º 6
0
    def __init__(self):
        self.seed()
        self.viewer = None

        self.observation_space = spaces.Box(0,
                                            255, (FIELD_H, FIELD_W, 3),
                                            dtype=np.uint8)
        self.action_space = spaces.Discrete(3)

        self.reset()
Ejemplo n.º 7
0
 def __init__(self):
     self.viewer = None
     high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
     low = -high
     self.observation_space = spaces.Box(low=low,
                                         high=high,
                                         dtype=np.float32)
     self.action_space = spaces.Discrete(3)
     self.state = None
     self.seed()
Ejemplo n.º 8
0
    def __init__(self,
                 initialWealth=25.0,
                 edge=0.6,
                 maxWealth=250.0,
                 maxRounds=300):

        self.action_space = spaces.Discrete(int(
            maxWealth * 100))  # betting in penny increments
        self.observation_space = spaces.Tuple((
            spaces.Box(0, maxWealth, [1], dtype=np.float32),  # (w,b)
            spaces.Discrete(maxRounds + 1)))
        self.reward_range = (0, maxWealth)
        self.edge = edge
        self.wealth = initialWealth
        self.initialWealth = initialWealth
        self.maxRounds = maxRounds
        self.maxWealth = maxWealth
        self.seed()
        self.reset()
Ejemplo n.º 9
0
 def __init__(self):
     self.seed()
     self.viewer = None
     self.observation_space = spaces.Box(0,
                                         255, (FIELD_H, FIELD_W, 3),
                                         dtype=np.uint8)
     self.action_space = spaces.Discrete(10)
     self.bogus_mnist = np.zeros((10, 6, 6), dtype=np.uint8)
     for digit in range(10):
         for y in range(6):
             self.bogus_mnist[digit, y, :] = [
                 ord(char) for char in bogus_mnist[digit][y]
             ]
     self.reset()
Ejemplo n.º 10
0
    def __init__(self):
        self.range = 1000  # Randomly selected number is within +/- this value
        self.bounds = 10000

        self.action_space = spaces.Box(low=np.array([-self.bounds]),
                                       high=np.array([self.bounds]),
                                       dtype=np.float32)
        self.observation_space = spaces.Discrete(4)

        self.number = 0
        self.guess_count = 0
        self.guess_max = 200
        self.observation = 0

        self.seed()
        self.reset()
Ejemplo n.º 11
0
    def __init__(self):
        self.range = 1000  # +/- value the randomly select number can be between
        self.bounds = 2000  # Action space bounds

        self.action_space = spaces.Box(low=np.array([-self.bounds]),
                                       high=np.array([self.bounds]),
                                       dtype=np.float32)
        self.observation_space = spaces.Discrete(4)

        self.number = 0
        self.guess_count = 0
        self.guess_max = 200
        self.observation = 0

        self.seed()
        self.reset()
Ejemplo n.º 12
0
    def __init__(self):
        self.min_position = -1.2
        self.max_position = 0.6
        self.max_speed = 0.07
        self.goal_position = 0.5

        self.low = np.array([self.min_position, -self.max_speed])
        self.high = np.array([self.max_position, self.max_speed])

        self.viewer = None

        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(self.low,
                                            self.high,
                                            dtype=np.float32)

        self.seed()
        self.reset()
Ejemplo n.º 13
0
 def __init__(self, spots=37):
     self.n = spots + 1
     self.action_space = spaces.Discrete(self.n)
     self.observation_space = spaces.Discrete(1)
     self.seed()