def __init__(self, initialWealth=25.0, edgePriorAlpha=7, edgePriorBeta=3, maxWealthAlpha=5.0, maxWealthM=200.0, maxRoundsMean=300.0, maxRoundsSD=25.0, reseed=True): # store the hyperparameters for passing back into __init__() during resets so the same hyperparameters govern the next game's parameters, as the user expects: TODO: this is boilerplate, is there any more elegant way to do this? self.initialWealth = float(initialWealth) self.edgePriorAlpha = edgePriorAlpha self.edgePriorBeta = edgePriorBeta self.maxWealthAlpha = maxWealthAlpha self.maxWealthM = maxWealthM self.maxRoundsMean = maxRoundsMean self.maxRoundsSD = maxRoundsSD # draw this game's set of parameters: edge = prng.np_random.beta(edgePriorAlpha, edgePriorBeta) maxWealth = round( genpareto.rvs(maxWealthAlpha, maxWealthM, random_state=prng.np_random)) maxRounds = int( round(prng.np_random.normal(maxRoundsMean, maxRoundsSD))) # add an additional global variable which is the sufficient statistic for the Pareto distribution on wealth cap; # alpha doesn't update, but x_m does, and simply is the highest wealth count we've seen to date: self.maxEverWealth = float(self.initialWealth) # for the coinflip edge, it is total wins/losses: self.wins = 0 self.losses = 0 # for the number of rounds, we need to remember how many rounds we've played: self.roundsElapsed = 0 # the rest proceeds as before: self.action_space = spaces.Discrete(int(maxWealth * 100)) self.observation_space = spaces.Tuple(( spaces.Box(0, maxWealth, shape=[1], dtype=np.float32), # current wealth spaces.Discrete(maxRounds + 1), # rounds elapsed spaces.Discrete(maxRounds + 1), # wins spaces.Discrete(maxRounds + 1), # losses spaces.Box(0, maxWealth, [1], dtype=np.float32))) # maximum observed wealth self.reward_range = (0, maxWealth) self.edge = edge self.wealth = self.initialWealth self.maxRounds = maxRounds self.rounds = self.maxRounds self.maxWealth = maxWealth if reseed or not hasattr(self, 'np_random'): self.seed()
def __init__(self): self.max_speed = 8 self.max_torque = 2. self.dt = .05 self.viewer = None high = np.array([1., 1., self.max_speed]) self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1, ), dtype=np.float32) self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32) self.seed()
def __init__(self): self.gravity = 9.8 self.masscart = 1.0 self.masspole = 0.1 self.total_mass = (self.masspole + self.masscart) self.length = 0.5 # actually half the pole's length self.polemass_length = (self.masspole * self.length) self.force_mag = 10.0 self.tau = 0.02 # seconds between state updates self.kinematics_integrator = 'euler' # Angle at which to fail the episode self.theta_threshold_radians = 12 * 2 * math.pi / 360 self.x_threshold = 2.4 # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds high = np.array([ self.x_threshold * 2, np.finfo(np.float32).max, self.theta_threshold_radians * 2, np.finfo(np.float32).max ]) self.action_space = spaces.Discrete(2) self.observation_space = spaces.Box(-high, high, dtype=np.float32) self.seed() self.viewer = None self.state = None self.steps_beyond_done = None
def __init__(self): self.seed() self.viewer = None self.observation_space = spaces.Box(0, 255, (FIELD_H, FIELD_W, 3), dtype=np.uint8) self.action_space = spaces.Discrete(3) self.reset()
def __init__(self): self.viewer = None high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2]) low = -high self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32) self.action_space = spaces.Discrete(3) self.state = None self.seed()
def __init__(self): self.seed() self.viewer = None self.observation_space = spaces.Box(0, 255, (FIELD_H, FIELD_W, 3), dtype=np.uint8) self.action_space = spaces.Discrete(10) self.bogus_mnist = np.zeros((10, 6, 6), dtype=np.uint8) for digit in range(10): for y in range(6): self.bogus_mnist[digit, y, :] = [ ord(char) for char in bogus_mnist[digit][y] ] self.reset()
def __init__(self): self.gravity = 9.8 self.masscart = 1.0 self.masspole = 0.1 self.total_mass = (self.masspole + self.masscart) self.length = 0.5 # actually half the pole's length self.polemass_length = (self.masspole * self.length) self.force_mag = 10.0 self.tau = 0.02 # seconds between state updates self.max_steps_per_trial = 1000 self.step_counter = 0 # Conditions to fail the episode #self.theta_threshold_radians = 12 * 2 * math.pi / 360 #self.x_threshold = 2.4 self.max_theta = np.pi self.world_width = 6 self.x_goal = 1 self.x_start = -2 self.change = self.max_steps_per_trial # theta limit set to 2 * theta_threshold_radians so failing observation is still within bounds high = np.array([ self.world_width, np.finfo(np.float32).max, self.max_theta * 2, np.finfo(np.float32).max ]) self.action_space = spaces.Box(low=np.array([-10.0, -1.0]), high=np.array([10.0, 1.0]), dtype=np.float32) self.observation_space = spaces.Box(-high, high, dtype=np.float32) self.seed() self.viewer = None self.state = None self.steps_beyond_done = None
def __init__(self): self.min_action = -1.0 self.max_action = 1.0 self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 self.goal_position = 0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version self.power = 0.0015 self.low_state = np.array([self.min_position, -self.max_speed]) self.high_state = np.array([self.max_position, self.max_speed]) self.viewer = None self.action_space = spaces.Box(low=self.min_action, high=self.max_action, shape=(1, ), dtype=np.float32) self.observation_space = spaces.Box(low=self.low_state, high=self.high_state, dtype=np.float32) self.seed() self.reset()
def __init__(self): self.range = 1000 # Randomly selected number is within +/- this value self.bounds = 10000 self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]), dtype=np.float32) self.observation_space = spaces.Discrete(4) self.number = 0 self.guess_count = 0 self.guess_max = 200 self.observation = 0 self.seed() self.reset()
def __init__(self): self.range = 1000 # +/- value the randomly select number can be between self.bounds = 2000 # Action space bounds self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]), dtype=np.float32) self.observation_space = spaces.Discrete(4) self.number = 0 self.guess_count = 0 self.guess_max = 200 self.observation = 0 self.seed() self.reset()
def __init__(self): self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 self.goal_position = 0.5 self.low = np.array([self.min_position, -self.max_speed]) self.high = np.array([self.max_position, self.max_speed]) self.viewer = None self.action_space = spaces.Discrete(3) self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32) self.seed() self.reset()
def __init__(self, initialWealth=25.0, edge=0.6, maxWealth=250.0, maxRounds=300): self.action_space = spaces.Discrete(int( maxWealth * 100)) # betting in penny increments self.observation_space = spaces.Tuple(( spaces.Box(0, maxWealth, [1], dtype=np.float32), # (w,b) spaces.Discrete(maxRounds + 1))) self.reward_range = (0, maxWealth) self.edge = edge self.wealth = initialWealth self.initialWealth = initialWealth self.maxRounds = maxRounds self.maxWealth = maxWealth self.seed() self.reset()