Exemple #1
0
    def test_substraction(self):
        space = StateActionSpace(*Box(0, 1, (10, 10)).sets)
        # `rewarded` should be a Subspace, but this is not implemented yet
        s = StateActionSpace(*Box([0, 0], [0.5, 0.5], (10, 10)).sets)

        r1 = ConstantReward(space, 1, rewarded_set=s)
        r2 = ConstantReward(space, 1, rewarded_set=s)

        reward = r1 - r2

        total = 0
        for t in range(100):
            s, a = space.get_tuple(space.sample())
            total += reward.get_reward(s, a, space.state_space.sample(), False)
        self.assertEqual(total, 0)
Exemple #2
0
    def test_product_of_boxes(self):
        b1 = Box([0, 0], [1, 1], shape=(3, 3))
        b2 = Box(4, 5, shape=(11, 11))
        p = ProductSpace(b1, b2)

        x = p.sample()
        self.assertTrue(x in p)

        for i, v in iter(p):
            self.assertTrue(p[i] in p)
            self.assertTrue(np.all(v == p[i]))
            self.assertEqual(
                i,
                p.get_index_of(p[i])
            )
Exemple #3
0
    def test_unrewarded(self):
        space = StateActionSpace(*Box(0, 1, (10, 10)).sets)
        # `rewarded` should be a Subspace, but this is not implemented yet
        rewarded = StateActionSpace(*Box([0, 0], [0.5, 0.5], (10, 10)).sets)
        unrewarded = StateActionSpace(*Box([0.5, 0.5], [1, 1], (10, 10)).sets)

        reward = ConstantReward(space, 10, unrewarded_set=unrewarded)

        total = 0
        for t in range(10):
            s, a = space.get_tuple(space.sample())
            sampled_space = rewarded.state_space if t % 2 == 0 \
                else unrewarded.state_space

            total += reward.get_reward(s, a, sampled_space.sample(), False)
        self.assertEqual(total, 50)
Exemple #4
0
    def test_constant(self):
        space = StateActionSpace(*Box(0, 1, (10, 10)).sets)

        reward = ConstantReward(space, 10)

        total = 0
        for t in range(10):
            s, a = space.get_tuple(space.sample())
            total += reward.get_reward(s, a, space.state_space.sample(), False)
        self.assertEqual(total, 100)
Exemple #5
0
    def test_box_2(self):
        box = Box([0, 0], [1, 1], shape=(3, 3))
        values_1d = [0, 0.5, 1]
        values = [[x, y] for x in values_1d for y in values_1d]
        x = box.sample()
        self.assertTrue(list(x) in values)
        self.assertTrue(x in box)
        self.assertTrue(Space.element([0, 0]) in box)
        self.assertTrue(Space.element(0.5, 0.25) in box)
        self.assertTrue(not box.is_on_grid(Space.element(0.5, 0.25)))
        self.assertTrue(Space.element(-1, 0) not in box)

        idx = box.sample_idx()
        indexes_1d = (0, 1, 2)
        indexes = [[i, j] for i in indexes_1d for j in indexes_1d]
        self.assertTrue(isinstance(idx, tuple))
        self.assertEqual(len(idx), 2)
        self.assertTrue(list(idx) in indexes)

        for i, v in iter(box):
            self.assertTrue(list(box[i]) in values)
            self.assertTrue(box[i] in box)
            self.assertTrue(np.all(v == box[i]))
            self.assertEqual(
                i,
                box.get_index_of(box[i])
            )
Exemple #6
0
 def __init__(self,
              ground_gravity,
              gravity_gradient,
              control_frequency,
              max_thrust,
              max_altitude,
              shape=(200, 150)):
     stateaction_space = StateActionSpace.from_product(
         Box([0, 0], [max_altitude, max_thrust], shape))
     super(HovershipDynamics, self).__init__(stateaction_space)
     self.ground_gravity = ground_gravity
     self.gravity_gradient = gravity_gradient
     self.control_frequency = control_frequency
     self.ceiling_value = stateaction_space.state_space.high
     self.max_thrust = stateaction_space.action_space.high
    def __init__(self,
                 random_start=False,
                 default_initial_state=None,
                 dynamics_parameters=None,
                 reward=None,
                 reward_done_threshold=None,
                 steps_done_threshold=None,
                 goal_state=False):
        if dynamics_parameters is None:
            dynamics_parameters = {}
        default_dynamics_parameters = {
            'ground_gravity': 0.1,
            'gravity_gradient': 1,
            'control_frequency': 1,
            'max_thrust': 0.8,
            'max_altitude': 2,
            'shape': (200, 150)
        }
        default_dynamics_parameters.update(dynamics_parameters)
        dynamics = HovershipDynamics(**default_dynamics_parameters)

        if reward is None:
            # The default reward gives a 1 reward when the agent is above 80% of the ceiling value
            max_altitude = default_dynamics_parameters['max_altitude']
            max_thrust = default_dynamics_parameters['max_thrust']
            rewarded_set = StateActionSpace.from_product(
                Box([0.8 * max_altitude, 0], [max_altitude, max_thrust],
                    (100, 100)))
            reward = ConstantReward(dynamics.stateaction_space,
                                    constant=1,
                                    rewarded_set=rewarded_set)

        if default_initial_state is None:
            # The default initial state is unrewarded with the default reward
            max_altitude = default_dynamics_parameters['max_altitude']
            default_initial_state = atleast_1d(0.75 * max_altitude)

        super(Hovership,
              self).__init__(dynamics=dynamics,
                             reward=reward,
                             default_initial_state=default_initial_state,
                             random_start=random_start,
                             reward_done_threshold=reward_done_threshold,
                             steps_done_threshold=steps_done_threshold)

        self.goal_state = goal_state
Exemple #8
0
 def __init__(self,
              gravity,
              mass,
              stiffness,
              resting_length,
              energy,
              failed=False,
              state_bounds=(0.0, 1),
              action_bounds=(-1/18*np.pi, 7/18*np.pi),
              shape=(200, 100)):
     stateaction_space = StateActionSpace.from_product(
         Box([state_bounds[0], action_bounds[0]],
             [state_bounds[1], action_bounds[1]], shape)
     )
     super(SlipDynamics, self).__init__(stateaction_space)
     self.gravity = gravity
     self.mass = mass
     self.stiffness = stiffness
     self.resting_length = resting_length
     self.energy = energy
     self.failed = False
Exemple #9
0
    def test_slicing(self):
        tolerance = 1e-7

        def assertClose(x, y):
            self.assertTrue(np.all(np.abs(x - y) < tolerance))

        s = Segment(0, 1, 10)
        u = np.linspace(0, 1, 10).reshape((-1, 1))

        t = s[:]
        assertClose(t, u)

        t = s[25:75:2]
        assertClose(t, u[25:75:2])

        s = Box(0, 1, (2, 2))
        ux = np.linspace(0, 1, 2)
        uy = np.linspace(0, 1, 2)
        u = np.dstack(np.meshgrid(ux, uy, indexing='ij'))

        t = s[:, :]
        assertClose(t, u)

        t = s[0, :]
        assertClose(t, u[0, :])

        t = s[0]
        assertClose(t, u[0, :])

        t = s[:, 0]
        assertClose(t, u[:, 0])

        t = s[0, 1]
        assertClose(t, u[0, 1])

        t = s[np.array([0.15]), :]
        assertClose(t[:, 1], uy)
Exemple #10
0
 def test_indexing_involution(self):
     b = Box(0, 1, (2, 2))
     self.assertTrue((b[0,0] == b[b[0,0]]).all())