def __init__(self, n=5, slip=0.2, small=2, large=10): self.n = n self.slip = slip # probability of 'slipping' an action self.small = small # payout for 'backwards' action self.large = large # payout at end of chain for 'forwards' action self.state = 0 # Start at beginning of the chain self.action_space = spaces.Discrete(2) self.observation_space = spaces.Discrete(self.n) self.seed()
def __init__(self, nS, nA, P, isd): self.P = P self.isd = isd self.lastaction = None # for rendering self.nS = nS self.nA = nA self.action_space = spaces.Discrete(self.nA) self.observation_space = spaces.Discrete(self.nS) self.seed() self.reset()
def __init__(self, initialWealth=25.0, edgePriorAlpha=7, edgePriorBeta=3, maxWealthAlpha=5.0, maxWealthM=200.0, maxRoundsMean=300.0, maxRoundsSD=25.0, reseed=True): # store the hyperparameters for passing back into __init__() during resets so the same hyperparameters govern the next game's parameters, as the user expects: TODO: this is boilerplate, is there any more elegant way to do this? self.initialWealth = float(initialWealth) self.edgePriorAlpha = edgePriorAlpha self.edgePriorBeta = edgePriorBeta self.maxWealthAlpha = maxWealthAlpha self.maxWealthM = maxWealthM self.maxRoundsMean = maxRoundsMean self.maxRoundsSD = maxRoundsSD # draw this game's set of parameters: edge = prng.np_random.beta(edgePriorAlpha, edgePriorBeta) maxWealth = round( genpareto.rvs(maxWealthAlpha, maxWealthM, random_state=prng.np_random)) maxRounds = int( round(prng.np_random.normal(maxRoundsMean, maxRoundsSD))) # add an additional global variable which is the sufficient statistic for the Pareto distribution on wealth cap; # alpha doesn't update, but x_m does, and simply is the highest wealth count we've seen to date: self.maxEverWealth = float(self.initialWealth) # for the coinflip edge, it is total wins/losses: self.wins = 0 self.losses = 0 # for the number of rounds, we need to remember how many rounds we've played: self.roundsElapsed = 0 # the rest proceeds as before: self.action_space = spaces.Discrete(int(maxWealth * 100)) self.observation_space = spaces.Tuple(( spaces.Box(0, maxWealth, shape=[1], dtype=np.float32), # current wealth spaces.Discrete(maxRounds + 1), # rounds elapsed spaces.Discrete(maxRounds + 1), # wins spaces.Discrete(maxRounds + 1), # losses spaces.Box(0, maxWealth, [1], dtype=np.float32))) # maximum observed wealth self.reward_range = (0, maxWealth) self.edge = edge self.wealth = self.initialWealth self.maxRounds = maxRounds self.rounds = self.maxRounds self.maxWealth = maxWealth if reseed or not hasattr(self, 'np_random'): self.seed()
def __init__(self, natural=False): self.action_space = spaces.Discrete(2) self.observation_space = spaces.Tuple(( spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2))) self.seed() # Flag to payout 1.5 on a "natural" blackjack win, like casino rules # Ref: http://www.bicyclecards.com/how-to-play/blackjack/ self.natural = natural # Start the first game self.reset()
def __init__(self): self.gravity = 9.8 self.masscart = 1.0 self.masspole = 0.1 self.total_mass = (self.masspole + self.masscart) self.length = 0.5 # actually half the pole's length self.polemass_length = (self.masspole * self.length) self.force_mag = 10.0 self.tau = 0.02 # seconds between state updates self.kinematics_integrator = 'euler' # Angle at which to fail the episode self.theta_threshold_radians = 12 * 2 * math.pi / 360 self.x_threshold = 2.4 # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds high = np.array([ self.x_threshold * 2, np.finfo(np.float32).max, self.theta_threshold_radians * 2, np.finfo(np.float32).max ]) self.action_space = spaces.Discrete(2) self.observation_space = spaces.Box(-high, high, dtype=np.float32) self.seed() self.viewer = None self.state = None self.steps_beyond_done = None
def __init__(self): self.seed() self.viewer = None self.observation_space = spaces.Box(0, 255, (FIELD_H, FIELD_W, 3), dtype=np.uint8) self.action_space = spaces.Discrete(3) self.reset()
def __init__(self): self.viewer = None high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2]) low = -high self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32) self.action_space = spaces.Discrete(3) self.state = None self.seed()
def __init__(self, initialWealth=25.0, edge=0.6, maxWealth=250.0, maxRounds=300): self.action_space = spaces.Discrete(int( maxWealth * 100)) # betting in penny increments self.observation_space = spaces.Tuple(( spaces.Box(0, maxWealth, [1], dtype=np.float32), # (w,b) spaces.Discrete(maxRounds + 1))) self.reward_range = (0, maxWealth) self.edge = edge self.wealth = initialWealth self.initialWealth = initialWealth self.maxRounds = maxRounds self.maxWealth = maxWealth self.seed() self.reset()
def __init__(self): self.seed() self.viewer = None self.observation_space = spaces.Box(0, 255, (FIELD_H, FIELD_W, 3), dtype=np.uint8) self.action_space = spaces.Discrete(10) self.bogus_mnist = np.zeros((10, 6, 6), dtype=np.uint8) for digit in range(10): for y in range(6): self.bogus_mnist[digit, y, :] = [ ord(char) for char in bogus_mnist[digit][y] ] self.reset()
def __init__(self): self.range = 1000 # Randomly selected number is within +/- this value self.bounds = 10000 self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]), dtype=np.float32) self.observation_space = spaces.Discrete(4) self.number = 0 self.guess_count = 0 self.guess_max = 200 self.observation = 0 self.seed() self.reset()
def __init__(self): self.range = 1000 # +/- value the randomly select number can be between self.bounds = 2000 # Action space bounds self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]), dtype=np.float32) self.observation_space = spaces.Discrete(4) self.number = 0 self.guess_count = 0 self.guess_max = 200 self.observation = 0 self.seed() self.reset()
def __init__(self): self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 self.goal_position = 0.5 self.low = np.array([self.min_position, -self.max_speed]) self.high = np.array([self.max_position, self.max_speed]) self.viewer = None self.action_space = spaces.Discrete(3) self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32) self.seed() self.reset()
def __init__(self, spots=37): self.n = spots + 1 self.action_space = spaces.Discrete(self.n) self.observation_space = spaces.Discrete(1) self.seed()