def test_substraction(self): space = StateActionSpace(*Box(0, 1, (10, 10)).sets) # `rewarded` should be a Subspace, but this is not implemented yet s = StateActionSpace(*Box([0, 0], [0.5, 0.5], (10, 10)).sets) r1 = ConstantReward(space, 1, rewarded_set=s) r2 = ConstantReward(space, 1, rewarded_set=s) reward = r1 - r2 total = 0 for t in range(100): s, a = space.get_tuple(space.sample()) total += reward.get_reward(s, a, space.state_space.sample(), False) self.assertEqual(total, 0)
def test_product_of_boxes(self): b1 = Box([0, 0], [1, 1], shape=(3, 3)) b2 = Box(4, 5, shape=(11, 11)) p = ProductSpace(b1, b2) x = p.sample() self.assertTrue(x in p) for i, v in iter(p): self.assertTrue(p[i] in p) self.assertTrue(np.all(v == p[i])) self.assertEqual( i, p.get_index_of(p[i]) )
def test_unrewarded(self): space = StateActionSpace(*Box(0, 1, (10, 10)).sets) # `rewarded` should be a Subspace, but this is not implemented yet rewarded = StateActionSpace(*Box([0, 0], [0.5, 0.5], (10, 10)).sets) unrewarded = StateActionSpace(*Box([0.5, 0.5], [1, 1], (10, 10)).sets) reward = ConstantReward(space, 10, unrewarded_set=unrewarded) total = 0 for t in range(10): s, a = space.get_tuple(space.sample()) sampled_space = rewarded.state_space if t % 2 == 0 \ else unrewarded.state_space total += reward.get_reward(s, a, sampled_space.sample(), False) self.assertEqual(total, 50)
def test_constant(self): space = StateActionSpace(*Box(0, 1, (10, 10)).sets) reward = ConstantReward(space, 10) total = 0 for t in range(10): s, a = space.get_tuple(space.sample()) total += reward.get_reward(s, a, space.state_space.sample(), False) self.assertEqual(total, 100)
def test_box_2(self): box = Box([0, 0], [1, 1], shape=(3, 3)) values_1d = [0, 0.5, 1] values = [[x, y] for x in values_1d for y in values_1d] x = box.sample() self.assertTrue(list(x) in values) self.assertTrue(x in box) self.assertTrue(Space.element([0, 0]) in box) self.assertTrue(Space.element(0.5, 0.25) in box) self.assertTrue(not box.is_on_grid(Space.element(0.5, 0.25))) self.assertTrue(Space.element(-1, 0) not in box) idx = box.sample_idx() indexes_1d = (0, 1, 2) indexes = [[i, j] for i in indexes_1d for j in indexes_1d] self.assertTrue(isinstance(idx, tuple)) self.assertEqual(len(idx), 2) self.assertTrue(list(idx) in indexes) for i, v in iter(box): self.assertTrue(list(box[i]) in values) self.assertTrue(box[i] in box) self.assertTrue(np.all(v == box[i])) self.assertEqual( i, box.get_index_of(box[i]) )
def __init__(self, ground_gravity, gravity_gradient, control_frequency, max_thrust, max_altitude, shape=(200, 150)): stateaction_space = StateActionSpace.from_product( Box([0, 0], [max_altitude, max_thrust], shape)) super(HovershipDynamics, self).__init__(stateaction_space) self.ground_gravity = ground_gravity self.gravity_gradient = gravity_gradient self.control_frequency = control_frequency self.ceiling_value = stateaction_space.state_space.high self.max_thrust = stateaction_space.action_space.high
def __init__(self, random_start=False, default_initial_state=None, dynamics_parameters=None, reward=None, reward_done_threshold=None, steps_done_threshold=None, goal_state=False): if dynamics_parameters is None: dynamics_parameters = {} default_dynamics_parameters = { 'ground_gravity': 0.1, 'gravity_gradient': 1, 'control_frequency': 1, 'max_thrust': 0.8, 'max_altitude': 2, 'shape': (200, 150) } default_dynamics_parameters.update(dynamics_parameters) dynamics = HovershipDynamics(**default_dynamics_parameters) if reward is None: # The default reward gives a 1 reward when the agent is above 80% of the ceiling value max_altitude = default_dynamics_parameters['max_altitude'] max_thrust = default_dynamics_parameters['max_thrust'] rewarded_set = StateActionSpace.from_product( Box([0.8 * max_altitude, 0], [max_altitude, max_thrust], (100, 100))) reward = ConstantReward(dynamics.stateaction_space, constant=1, rewarded_set=rewarded_set) if default_initial_state is None: # The default initial state is unrewarded with the default reward max_altitude = default_dynamics_parameters['max_altitude'] default_initial_state = atleast_1d(0.75 * max_altitude) super(Hovership, self).__init__(dynamics=dynamics, reward=reward, default_initial_state=default_initial_state, random_start=random_start, reward_done_threshold=reward_done_threshold, steps_done_threshold=steps_done_threshold) self.goal_state = goal_state
def __init__(self, gravity, mass, stiffness, resting_length, energy, failed=False, state_bounds=(0.0, 1), action_bounds=(-1/18*np.pi, 7/18*np.pi), shape=(200, 100)): stateaction_space = StateActionSpace.from_product( Box([state_bounds[0], action_bounds[0]], [state_bounds[1], action_bounds[1]], shape) ) super(SlipDynamics, self).__init__(stateaction_space) self.gravity = gravity self.mass = mass self.stiffness = stiffness self.resting_length = resting_length self.energy = energy self.failed = False
def test_slicing(self): tolerance = 1e-7 def assertClose(x, y): self.assertTrue(np.all(np.abs(x - y) < tolerance)) s = Segment(0, 1, 10) u = np.linspace(0, 1, 10).reshape((-1, 1)) t = s[:] assertClose(t, u) t = s[25:75:2] assertClose(t, u[25:75:2]) s = Box(0, 1, (2, 2)) ux = np.linspace(0, 1, 2) uy = np.linspace(0, 1, 2) u = np.dstack(np.meshgrid(ux, uy, indexing='ij')) t = s[:, :] assertClose(t, u) t = s[0, :] assertClose(t, u[0, :]) t = s[0] assertClose(t, u[0, :]) t = s[:, 0] assertClose(t, u[:, 0]) t = s[0, 1] assertClose(t, u[0, 1]) t = s[np.array([0.15]), :] assertClose(t[:, 1], uy)
def test_indexing_involution(self): b = Box(0, 1, (2, 2)) self.assertTrue((b[0,0] == b[b[0,0]]).all())