def test_constant(self): space = StateActionSpace(*Box(0, 1, (10, 10)).sets) reward = ConstantReward(space, 10) total = 0 for t in range(10): s, a = space.get_tuple(space.sample()) total += reward.get_reward(s, a, space.state_space.sample(), False) self.assertEqual(total, 100)
def test_substraction(self): space = StateActionSpace(*Box(0, 1, (10, 10)).sets) # `rewarded` should be a Subspace, but this is not implemented yet s = StateActionSpace(*Box([0, 0], [0.5, 0.5], (10, 10)).sets) r1 = ConstantReward(space, 1, rewarded_set=s) r2 = ConstantReward(space, 1, rewarded_set=s) reward = r1 - r2 total = 0 for t in range(100): s, a = space.get_tuple(space.sample()) total += reward.get_reward(s, a, space.state_space.sample(), False) self.assertEqual(total, 0)
def test_unrewarded(self): space = StateActionSpace(*Box(0, 1, (10, 10)).sets) # `rewarded` should be a Subspace, but this is not implemented yet rewarded = StateActionSpace(*Box([0, 0], [0.5, 0.5], (10, 10)).sets) unrewarded = StateActionSpace(*Box([0.5, 0.5], [1, 1], (10, 10)).sets) reward = ConstantReward(space, 10, unrewarded_set=unrewarded) total = 0 for t in range(10): s, a = space.get_tuple(space.sample()) sampled_space = rewarded.state_space if t % 2 == 0 \ else unrewarded.state_space total += reward.get_reward(s, a, sampled_space.sample(), False) self.assertEqual(total, 50)
def __init__(self, penalty_level=100, dynamics_parameters=None): super(PenalizedSlip, self).__init__(dynamics_parameters) def penalty_condition(state, action, new_state, reward): return self.is_failure_state(new_state) penalty = ConstantReward(self.reward.stateaction_space, -penalty_level, reward_condition=penalty_condition) self.reward += penalty
def __init__(self, max_altitude): dynamics_parameters = { 'ground_gravity': 2, 'gravity_gradient': 0, 'max_thrust': 1, 'max_altitude': max_altitude, 'minimum_gravity_altitude': 0, 'maximum_gravity_altitude': 5 } super(UnviableHovership, self).__init__( random_start=True, dynamics_parameters=dynamics_parameters, reward_done_threshold=None, steps_done_threshold=None, ) def condition(state, action, new_state, reward): return state.squeeze() == 1 zero_reward = ConstantReward(self.stateaction_space, 0) final_reward = ConstantReward(self.stateaction_space, 1., reward_condition=condition) self.reward = zero_reward + final_reward
def __init__(self, random_start=False, default_initial_state=None, dynamics_parameters=None, reward=None, reward_done_threshold=None, steps_done_threshold=None, goal_state=False): if dynamics_parameters is None: dynamics_parameters = {} default_dynamics_parameters = { 'ground_gravity': 0.1, 'gravity_gradient': 1, 'control_frequency': 1, 'max_thrust': 0.8, 'max_altitude': 2, 'shape': (200, 150) } default_dynamics_parameters.update(dynamics_parameters) dynamics = HovershipDynamics(**default_dynamics_parameters) if reward is None: # The default reward gives a 1 reward when the agent is above 80% of the ceiling value max_altitude = default_dynamics_parameters['max_altitude'] max_thrust = default_dynamics_parameters['max_thrust'] rewarded_set = StateActionSpace.from_product( Box([0.8 * max_altitude, 0], [max_altitude, max_thrust], (100, 100))) reward = ConstantReward(dynamics.stateaction_space, constant=1, rewarded_set=rewarded_set) if default_initial_state is None: # The default initial state is unrewarded with the default reward max_altitude = default_dynamics_parameters['max_altitude'] default_initial_state = atleast_1d(0.75 * max_altitude) super(Hovership, self).__init__(dynamics=dynamics, reward=reward, default_initial_state=default_initial_state, random_start=random_start, reward_done_threshold=reward_done_threshold, steps_done_threshold=steps_done_threshold) self.goal_state = goal_state
def __init__(self, random_start=False, default_initial_state=None, dynamics_parameters=None, reward_done_threshold=None, steps_done_threshold=None): if dynamics_parameters is None: dynamics_parameters = {} default_dynamics_parameters = { 'gravity': 9.81, 'mass': 80.0, 'stiffness': 8200.0, 'resting_length': 1.0, 'energy': 1877.08, 'shape': (200, 10) } default_dynamics_parameters.update(dynamics_parameters) dynamics = SlipDynamics(**default_dynamics_parameters) # just give it a reward every step, no matter what # this is equivalent to "hop as long as possible" reward = ConstantReward(dynamics.stateaction_space, constant=1) # now let's incentivize going as fast as possible. reward += AffineReward(dynamics.stateaction_space, [[1, 0], [0, 0]]) if default_initial_state is None: # TODO change this so it uses stateaction wrappers # not sure what the above is referring to... # max_altitude = default_dynamics_parameters['max_altitude'] # standing_energy = (default_dynamics_parameters['resting_length'] * # default_dynamics_parameters['mass'] * # default_dynamics_parameters['gravity'] / # default_dynamics_parameters['energy']) default_initial_state = atleast_1d(1.0) super(Slip, self).__init__( dynamics=dynamics, reward=reward, default_initial_state=default_initial_state, random_start=random_start, reward_done_threshold=reward_done_threshold, steps_done_threshold=steps_done_threshold )
def __init__(self, random_start=False, default_initial_state=None, dynamics_parameters=None, reward=None, reward_done_threshold=None, steps_done_threshold=None): if dynamics_parameters is None: dynamics_parameters = {} default_dynamics_parameters = { 'ground_gravity': 1, 'gravity_gradient': 1, 'max_thrust': 5, 'max_altitude': 10, 'minimum_gravity_altitude': 9, 'maximum_gravity_altitude': 3 } default_dynamics_parameters.update(dynamics_parameters) dynamics = DiscreteHovershipDynamics(**default_dynamics_parameters) if reward is None: # The default reward is simply getting to the top max_altitude = default_dynamics_parameters['max_altitude'] max_thrust = default_dynamics_parameters['max_thrust'] rewarded_set = StateActionSpace( Discrete(n=1, start=max_altitude, end=max_altitude), Discrete(max_thrust + 1)) reward = ConstantReward(dynamics.stateaction_space, constant=1, rewarded_set=rewarded_set) if default_initial_state is None: max_altitude = default_dynamics_parameters['max_altitude'] default_initial_state = atleast_1d(max_altitude) super(DiscreteHovership, self).__init__(dynamics=dynamics, reward=reward, default_initial_state=default_initial_state, random_start=random_start, reward_done_threshold=reward_done_threshold, steps_done_threshold=steps_done_threshold)
def __init__(self): dynamics_parameters = { 'ground_gravity': 1, 'gravity_gradient': 1, 'max_thrust': 2, 'max_altitude': 3, 'maximum_gravity_altitude': 0, 'minimum_gravity_altitude': 3 } super(ToyHovership, self).__init__(default_initial_state=np.array([2.]), dynamics_parameters=dynamics_parameters) max_altitude = self.state_space.n - 1 max_thrust = self.action_space.n - 1 rewarded_set = StateActionSpace( Discrete(n=2, start=max_altitude - 1, end=max_altitude), Discrete(max_thrust + 1)) reward = ConstantReward(self.stateaction_space, constant=1, rewarded_set=rewarded_set) self.reward = reward