Esempio n. 1
0
 def worker_reward_fn(states, goals, next_states):
     return negative_distance(states=states,
                              state_indices=self.goal_indices,
                              goals=goals,
                              next_states=next_states,
                              relative_context=relative_goals,
                              offset=0.0)
Esempio n. 2
0
 def intrinsic_reward_fn(states, goals, next_states):
     return negative_distance(
         states=states[self.goal_indices] / scale,
         goals=goals / scale,
         next_states=next_states[self.goal_indices] / scale,
         relative_context=relative_goals,
         offset=0.0) + offset
Esempio n. 3
0
 def contextual_reward(states, goals, next_states):
     return negative_distance(states=states,
                              goals=goals,
                              next_states=next_states,
                              state_indices=[0, 1, 2],
                              relative_context=False,
                              offset=0.0,
                              reward_scales=REWARD_SCALE)
Esempio n. 4
0
 def contextual_reward(states, goals, next_states):
     return negative_distance(states=states,
                              goals=goals,
                              next_states=next_states,
                              state_indices=[0, 1],
                              relative_context=False,
                              offset=0.0,
                              reward_scales=1 / 7.2,
                              output_activation=np.exp)
Esempio n. 5
0
 def test_negative_distance(self):
     a = np.array([1, 2, 10])
     b = np.array([1, 2])
     c = negative_distance(b, b, a, goal_indices=[1, 2])
     self.assertEqual(c, -8.062257748304752)