Esempio n. 1
0
 def make_box(scalar_range_list, dtype, defaults):
     bounds = [scalar_range2tuple(r, defaults) for r in scalar_range_list]
     return Box(
         low=np.array([b[0] for b in bounds], dtype=dtype),
         high=np.array([b[1] for b in bounds], dtype=dtype),
         dtype=dtype,
     )
Esempio n. 2
0
 def __init__(self, index: int, spec: RewardSpace):
     self.id = spec.name
     self.index = index
     self.range = scalar_range2tuple(spec.range)
     self.success_threshold = (
         spec.success_threshold if spec.has_success_threshold else None
     )
     self.deterministic = spec.deterministic
     self.platform_dependent = spec.platform_dependent
Esempio n. 3
0
    def __init__(
        self,
        get_reward: Callable[[RewardRequest], Reward],
        spaces: Sequence[RewardSpace],
    ):
        self._get_reward = get_reward
        self.session_id = -1

        if not spaces:
            raise ValueError("No reward spaces")

        self.indices = {s.name: i for i, s in enumerate(spaces)}
        self.ranges = {s.name: scalar_range2tuple(s.range) for s in spaces}
 def make_seq(scalar_range, dtype, defaults):
     return Sequence(
         size_range=scalar_range2tuple(scalar_range, defaults),
         dtype=dtype,
         opaque_data_format=proto.opaque_data_format,
     )