Ejemplo n.º 1
0
    def test_constant(self):
        space = StateActionSpace(*Box(0, 1, (10, 10)).sets)

        reward = ConstantReward(space, 10)

        total = 0
        for t in range(10):
            s, a = space.get_tuple(space.sample())
            total += reward.get_reward(s, a, space.state_space.sample(), False)
        self.assertEqual(total, 100)
Ejemplo n.º 2
0
    def test_substraction(self):
        space = StateActionSpace(*Box(0, 1, (10, 10)).sets)
        # `rewarded` should be a Subspace, but this is not implemented yet
        s = StateActionSpace(*Box([0, 0], [0.5, 0.5], (10, 10)).sets)

        r1 = ConstantReward(space, 1, rewarded_set=s)
        r2 = ConstantReward(space, 1, rewarded_set=s)

        reward = r1 - r2

        total = 0
        for t in range(100):
            s, a = space.get_tuple(space.sample())
            total += reward.get_reward(s, a, space.state_space.sample(), False)
        self.assertEqual(total, 0)
Ejemplo n.º 3
0
def get_spaces():
    state_space = Segment(0, 1, 101)
    action_space = Segment(-1, 2, 301)
    sa_space = StateActionSpace(
        state_space=state_space,
        action_space=action_space
    )
    return state_space, action_space, sa_space
Ejemplo n.º 4
0
 def __init__(self, ground_gravity, gravity_gradient, max_thrust,
              max_altitude, minimum_gravity_altitude,
              maximum_gravity_altitude):
     stateaction_space = StateActionSpace(Discrete(max_altitude + 1),
                                          Discrete(max_thrust + 1))
     super(DiscreteHovershipDynamics, self).__init__(stateaction_space)
     self.ground_gravity = ground_gravity
     self.gravity_gradient = gravity_gradient
     self.minimum_gravity_altitude = minimum_gravity_altitude
     self.maximum_gravity_altitude = maximum_gravity_altitude
Ejemplo n.º 5
0
 def __init__(self,
              ground_gravity,
              gravity_gradient,
              control_frequency,
              max_thrust,
              max_altitude,
              shape=(200, 150)):
     stateaction_space = StateActionSpace.from_product(
         Box([0, 0], [max_altitude, max_thrust], shape))
     super(HovershipDynamics, self).__init__(stateaction_space)
     self.ground_gravity = ground_gravity
     self.gravity_gradient = gravity_gradient
     self.control_frequency = control_frequency
     self.ceiling_value = stateaction_space.state_space.high
     self.max_thrust = stateaction_space.action_space.high
    def __init__(self, gym_env, shape=None, failure_critical=False,
                 control_frequency=None, **kwargs):
        self.gym_env = gym_env
        if shape is None:
            obs_shape = None
            action_shape = None
        if isinstance(gym_env.action_space, gspaces.Box):
            if shape is not None:
                action_space_ndim = get_index_length(gym_env.action_space)
                action_shape = shape[-action_space_ndim:]
            action_space = BoxWrapper(
                gym_env.action_space, discretization_shape=action_shape,
                **kwargs
            )
        elif isinstance(gym_env.action_space, gspaces.Discrete):
            action_space = DiscreteWrapper(gym_env.action_space)
        else:
            raise TypeError(f'Gym environment action_space is of type {type(gym_env.action_space)}, but only Box  '
                             'and Discrete are currently supported')

        if isinstance(gym_env.observation_space, gspaces.Box):
            if shape is not None:
                state_space_ndim = get_index_length(gym_env.observation_space)
                obs_shape = shape[:state_space_ndim]
            state_space = BoxWrapper(
                gym_env.observation_space,
                discretization_shape=obs_shape,
                **kwargs
            )
        elif isinstance(gym_env.observation_space, gspaces.Discrete):
            state_space = DiscreteWrapper(gym_env.observation_space)
        else:
            raise TypeError(f'Gym environment observation_space is of type {type(gym_env.observation_space)}, but only '
                            'Box and Discrete are currently supported')

        self.info = {}
        self._done = False
        self.failure_critical = failure_critical
        self.control_frequency = control_frequency

        dynamics = DummyDynamics(StateActionSpace(state_space, action_space))

        super(GymEnvironmentWrapper, self).__init__(
            dynamics=dynamics,
            reward=None,
            default_initial_state=gym_env.reset(),
            random_start=True
        )
    def __init__(self,
                 random_start=False,
                 default_initial_state=None,
                 dynamics_parameters=None,
                 reward=None,
                 reward_done_threshold=None,
                 steps_done_threshold=None,
                 goal_state=False):
        if dynamics_parameters is None:
            dynamics_parameters = {}
        default_dynamics_parameters = {
            'ground_gravity': 0.1,
            'gravity_gradient': 1,
            'control_frequency': 1,
            'max_thrust': 0.8,
            'max_altitude': 2,
            'shape': (200, 150)
        }
        default_dynamics_parameters.update(dynamics_parameters)
        dynamics = HovershipDynamics(**default_dynamics_parameters)

        if reward is None:
            # The default reward gives a 1 reward when the agent is above 80% of the ceiling value
            max_altitude = default_dynamics_parameters['max_altitude']
            max_thrust = default_dynamics_parameters['max_thrust']
            rewarded_set = StateActionSpace.from_product(
                Box([0.8 * max_altitude, 0], [max_altitude, max_thrust],
                    (100, 100)))
            reward = ConstantReward(dynamics.stateaction_space,
                                    constant=1,
                                    rewarded_set=rewarded_set)

        if default_initial_state is None:
            # The default initial state is unrewarded with the default reward
            max_altitude = default_dynamics_parameters['max_altitude']
            default_initial_state = atleast_1d(0.75 * max_altitude)

        super(Hovership,
              self).__init__(dynamics=dynamics,
                             reward=reward,
                             default_initial_state=default_initial_state,
                             random_start=random_start,
                             reward_done_threshold=reward_done_threshold,
                             steps_done_threshold=steps_done_threshold)

        self.goal_state = goal_state
    def __init__(self,
                 random_start=False,
                 default_initial_state=None,
                 dynamics_parameters=None,
                 reward=None,
                 reward_done_threshold=None,
                 steps_done_threshold=None):
        if dynamics_parameters is None:
            dynamics_parameters = {}
        default_dynamics_parameters = {
            'ground_gravity': 1,
            'gravity_gradient': 1,
            'max_thrust': 5,
            'max_altitude': 10,
            'minimum_gravity_altitude': 9,
            'maximum_gravity_altitude': 3
        }
        default_dynamics_parameters.update(dynamics_parameters)
        dynamics = DiscreteHovershipDynamics(**default_dynamics_parameters)

        if reward is None:
            # The default reward is simply getting to the top
            max_altitude = default_dynamics_parameters['max_altitude']
            max_thrust = default_dynamics_parameters['max_thrust']
            rewarded_set = StateActionSpace(
                Discrete(n=1, start=max_altitude, end=max_altitude),
                Discrete(max_thrust + 1))
            reward = ConstantReward(dynamics.stateaction_space,
                                    constant=1,
                                    rewarded_set=rewarded_set)

        if default_initial_state is None:
            max_altitude = default_dynamics_parameters['max_altitude']
            default_initial_state = atleast_1d(max_altitude)

        super(DiscreteHovership,
              self).__init__(dynamics=dynamics,
                             reward=reward,
                             default_initial_state=default_initial_state,
                             random_start=random_start,
                             reward_done_threshold=reward_done_threshold,
                             steps_done_threshold=steps_done_threshold)
 def __init__(self):
     dynamics_parameters = {
         'ground_gravity': 1,
         'gravity_gradient': 1,
         'max_thrust': 2,
         'max_altitude': 3,
         'maximum_gravity_altitude': 0,
         'minimum_gravity_altitude': 3
     }
     super(ToyHovership,
           self).__init__(default_initial_state=np.array([2.]),
                          dynamics_parameters=dynamics_parameters)
     max_altitude = self.state_space.n - 1
     max_thrust = self.action_space.n - 1
     rewarded_set = StateActionSpace(
         Discrete(n=2, start=max_altitude - 1, end=max_altitude),
         Discrete(max_thrust + 1))
     reward = ConstantReward(self.stateaction_space,
                             constant=1,
                             rewarded_set=rewarded_set)
     self.reward = reward
Ejemplo n.º 10
0
Archivo: slip.py Proyecto: sheim/edge
 def __init__(self,
              gravity,
              mass,
              stiffness,
              resting_length,
              energy,
              failed=False,
              state_bounds=(0.0, 1),
              action_bounds=(-1/18*np.pi, 7/18*np.pi),
              shape=(200, 100)):
     stateaction_space = StateActionSpace.from_product(
         Box([state_bounds[0], action_bounds[0]],
             [state_bounds[1], action_bounds[1]], shape)
     )
     super(SlipDynamics, self).__init__(stateaction_space)
     self.gravity = gravity
     self.mass = mass
     self.stiffness = stiffness
     self.resting_length = resting_length
     self.energy = energy
     self.failed = False
Ejemplo n.º 11
0
    def test_unrewarded(self):
        space = StateActionSpace(*Box(0, 1, (10, 10)).sets)
        # `rewarded` should be a Subspace, but this is not implemented yet
        rewarded = StateActionSpace(*Box([0, 0], [0.5, 0.5], (10, 10)).sets)
        unrewarded = StateActionSpace(*Box([0.5, 0.5], [1, 1], (10, 10)).sets)

        reward = ConstantReward(space, 10, unrewarded_set=unrewarded)

        total = 0
        for t in range(10):
            s, a = space.get_tuple(space.sample())
            sampled_space = rewarded.state_space if t % 2 == 0 \
                else unrewarded.state_space

            total += reward.get_reward(s, a, sampled_space.sample(), False)
        self.assertEqual(total, 50)
Ejemplo n.º 12
0
    def from_vibly_file(self, vibly_file_path):
        """
        Loads the ground truth from a vibly file
        Important note: for this method to work, you need to define the lookup dictionary in
        utils.vibly_compatibility_utils for your environment, if it is not done by default.
        :param vibly_file_path: str or Path: the file from where the ground truth should be loaded
        """
        vibly_file_path = Path(vibly_file_path)
        with vibly_file_path.open('rb') as f:
            data = pkl.load(f)
        # `data` is a dictionary with: data.keys() = ['grids', 'Q_map', 'Q_F', 'Q_V', 'Q_M', 'S_M', 'p', 'x0']
        states = data['grids']['states']
        actions = data['grids']['actions']

        if len(states) != self.env.state_space.index_dim:
            raise IndexError('Size mismatch : expected state space with '
                             f'{self.env.state_space.index_dim} dimensions, '
                             f'got {len(states)}')
        if len(actions) != self.env.action_space.index_dim:
            raise IndexError('Size mismatch : expected action space with '
                             f'{self.env.action_space.index_dim} dimensions, '
                             f'got {len(actions)}')

        def get_product_space(np_components):
            segments = [
                Segment(comp[0], comp[-1], comp.shape[0])
                for comp in np_components
            ]
            product_space = ProductSpace(*segments)
            return product_space

        # Vibly stores grids lazily, by only storing each dimension independently and only doing meshgrids when
        # required. Our StateActionSpace structure enables us to use an efficient ProductSpace instead
        state_space = get_product_space(states)
        action_space = get_product_space(actions)
        self.stateaction_space = StateActionSpace(state_space, action_space)

        self.state_measure = data['S_M']
        self.measure_value = data['Q_M']
        self.viable_set = data['Q_V']
        self.failure_set = data['Q_F']
        self.unviable_set = ~self.failure_set
        self.unviable_set[self.viable_set] = False

        # TODO get rid of that
        self.viable_set = self.viable_set.astype(float)
        self.failure_set = self.failure_set.astype(float)
        self.unviable_set = self.unviable_set.astype(float)

        # This fails if the lookup dictionary is not defined in utils.vibly_compatibility_utils
        lookup_dictionary = get_parameters_lookup_dictionary(self.env)

        # We refuse to load a SafetyTruth if the parameters that are defined for the current environment and the ones
        # that were used for the computation of the ground truth are not exactly the same
        # An alternative is to load it nevertheless, but this behaviour may lead to errors that are very hard to debug.
        for vibly_pname, vibly_value in data['p'].items():
            pname = lookup_dictionary[vibly_pname]
            if pname is not None:
                value = self.env.dynamics.parameters[pname]
                if value != vibly_value:
                    raise ValueError(
                        f'Value mismatch: value loaded for {pname} from '
                        f'{vibly_pname} does not match. Expected {value}, '
                        f'got {vibly_value}')