def worker_reward_fn(states, goals, next_states): return negative_distance(states=states, state_indices=self.goal_indices, goals=goals, next_states=next_states, relative_context=relative_goals, offset=0.0)
def intrinsic_reward_fn(states, goals, next_states): return negative_distance( states=states[self.goal_indices] / scale, goals=goals / scale, next_states=next_states[self.goal_indices] / scale, relative_context=relative_goals, offset=0.0) + offset
def contextual_reward(states, goals, next_states): return negative_distance(states=states, goals=goals, next_states=next_states, state_indices=[0, 1, 2], relative_context=False, offset=0.0, reward_scales=REWARD_SCALE)
def contextual_reward(states, goals, next_states): return negative_distance(states=states, goals=goals, next_states=next_states, state_indices=[0, 1], relative_context=False, offset=0.0, reward_scales=1 / 7.2, output_activation=np.exp)
def test_negative_distance(self): a = np.array([1, 2, 10]) b = np.array([1, 2]) c = negative_distance(b, b, a, goal_indices=[1, 2]) self.assertEqual(c, -8.062257748304752)